def test_fallback_search_solr(self):
        """Should work as test_fallback_search, but based on the native solr
           search utility """

        pc = api.portal.get_tool("portal_catalog")
        mock_results = SolrResponse()
        mock_results.response = pc({"path": {"query": "/plone/en-de"}})
        mock_search = MagicMock(return_value=mock_results)
        mock_search.getManager = lambda: SolrConnectionManager(active=True)
        from zope.interface import alsoProvides
        from plone.indexer.interfaces import IIndexableObject
        alsoProvides(mock_search, IIndexableObject)
        sm = self.portal.getSiteManager()
        sm.unregisterUtility(provided=ISearch)
        sm.unregisterUtility(provided=ISolrConnectionConfig)
        sm.registerUtility(component=SolrConnectionConfig(),
                           provided=ISolrConnectionConfig)
        sm.registerUtility(component=mock_search,
                           provided=ISearch)
        lf_search_view = self.portal.restrictedTraverse(
                "@@language-fallback-search")
        results = lf_search_view.search_solr("path_parents:/plone/events")
        self.assertEqual(set([x.getPath() for x in results]),
                         set(['/plone/en-de', '/plone/en-de/en-event', '/plone/en/notrans-event']))
        mock_search.search.assert_called_with(
                "path_parents:/plone/events +Language:en OR all OR de")
示例#2
0
    def test_fallback_search_solr(self):
        """Should work as test_fallback_search, but based on the native solr
           search utility """

        pc = api.portal.get_tool("portal_catalog")
        mock_results = SolrResponse()
        mock_results.response = pc({"path": {"query": "/plone/en-de"}})
        mock_search = MagicMock(return_value=mock_results)
        mock_search.getManager = lambda: SolrConnectionManager(active=True)
        from zope.interface import alsoProvides
        from plone.indexer.interfaces import IIndexableObject
        alsoProvides(mock_search, IIndexableObject)
        sm = self.portal.getSiteManager()
        sm.unregisterUtility(provided=ISearch)
        sm.unregisterUtility(provided=ISolrConnectionConfig)
        sm.registerUtility(component=SolrConnectionConfig(),
                           provided=ISolrConnectionConfig)
        sm.registerUtility(component=mock_search, provided=ISearch)
        lf_search_view = self.portal.restrictedTraverse(
            "@@language-fallback-search")
        results = lf_search_view.search_solr("path_parents:/plone/events")
        self.assertEqual(
            set([x.getPath() for x in results]),
            set([
                '/plone/en-de', '/plone/en-de/en-event',
                '/plone/en/notrans-event'
            ]))
        mock_search.search.assert_called_with(
            "path_parents:/plone/events +Language:en OR all OR de")
示例#3
0
 def testParseResultsActualResultCount(self):
     complex_xml_response = getData("complex_xml_response.txt")
     response = SolrResponse(complex_xml_response)
     self.assertEqual(response.actual_result_count, 2)
     response.actual_result_count = 1
     results = response.response  # the result set is named 'response'
     self.assertEqual(response.actual_result_count, 1)
     self.assertEqual(len(results), 2)
示例#4
0
    def cleanup(self, batch=1000):
        """ remove entries from solr that don't have a corresponding Zope
            object or have a different UID than the real object"""
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        log = self.mklog(use_std_log=True)
        log('cleaning up solr index...\n')
        key = manager.getSchema().uniqueKey

        start = 0
        resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start))
        res = resp.results()
        log('%s items in solr catalog\n' % resp.response.numFound)
        deleted = 0
        reindexed = 0
        while len(res) > 0:
            for flare in res:
                try:
                    ob = PloneFlare(flare).getObject()
                except Exception as err:
                    log('Error getting object, removing: %s (%s)\n' % (
                        flare['path_string'], err))
                    conn.delete(flare[key])
                    deleted += 1
                    continue
                if not IUUIDAware.providedBy(ob):
                    log('Object %s of type %s does not support uuids, skipping.\n' %
                        ('/'.join(ob.getPhysicalPath()), ob.meta_type))
                    continue
                uuid = IUUID(ob)
                if uuid != flare[key]:
                    log('indexed under wrong UID, removing: %s\n' %
                        flare['path_string'])
                    conn.delete(flare[key])
                    deleted += 1
                    realob_res = SolrResponse(conn.search(q='%s:%s' %
                                              (key, uuid))).results()
                    if len(realob_res) == 0:
                        log('no sane entry for last object, reindexing\n')
                        data, missing = proc.getData(ob)
                        prepareData(data)
                        if not missing:
                            boost = boost_values(ob, data)
                            conn.add(boost_values=boost, **data)
                            reindexed += 1
                        else:
                            log('  missing data, cannot index.\n')
            log('handled batch of %d items, commiting\n' % len(res))
            conn.commit()
            start += batch
            resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start))
            res = resp.results()
        msg = 'solr cleanup finished, %s item(s) removed, %s item(s) reindexed\n' % (deleted, reindexed)
        log(msg)
        logger.info(msg)
示例#5
0
 def testParseComplexSearchResults(self):
     complex_xml_response = getData('complex_xml_response.txt')
     response = SolrResponse(complex_xml_response)
     results = response.response     # the result set is named 'response'
     self.assertEqual(results.numFound, '2')
     self.assertEqual(results.start, '0')
     self.assertEqual(len(results), 2)
     first = results[0]
     self.assertEqual(first.cat, ['software', 'search'])
     self.assertEqual(len(first.features), 7)
     self.assertEqual([type(x).__name__ for x in first.features],
         ['str'] * 6 + ['unicode'])
     self.assertEqual(first.id, 'SOLR1000')
     self.assertEqual(first.inStock, True)
     self.assertEqual(first.incubationdate_dt, DateTime('2006/01/17 GMT'))
     self.assertEqual(first.manu, 'Apache Software Foundation')
     self.assertEqual(first.popularity, 10)
     self.assertEqual(first.price, 0.0)
     headers = response.responseHeader
     self.assertEqual(headers['status'], 0)
     self.assertEqual(headers['QTime'], 0)
     self.assertEqual(headers['params']['indent'], 'on')
     self.assertEqual(headers['params']['rows'], '10')
     self.assertEqual(headers['params']['start'], '0')
     self.assertEqual(headers['params']['q'], 'id:[* TO *]')
     self.assertEqual(headers['params']['version'], '2.2')
示例#6
0
 def testConvertFacetResponse(self):
     response = SolrResponse(getData("facet_xml_response.txt"))
     fields = response.facet_counts["facet_fields"]
     view = DummyView(request=TestRequest())
     info = convertFacets(fields, view=view)
     # the info should consist of 2 dicts with
     # `counts`, `name` and `title` keys
     self.assertEqual([sorted(i) for i in info],
                      [["counts", "name", "title"]] * 2)
     # next let's check the field names
     self.assertEqual([i["title"] for i in info], ["cat", "inStock"])
     # and the fields contents
     cat, inStock = info
     self.assertEqual(cat["title"], "cat")
     self.assertEqual(
         [(c["name"], c["title"], c["count"]) for c in cat["counts"]],
         [
             ("search", "Title of Search", 1),
             ("software", "Title of Software", 1),
             ("electronics", "Title of Electronics", 0),
             ("monitor", "Title of Monitor", 0),
         ],
     )
     self.assertEqual(inStock["title"], "inStock")
     self.assertEqual([(c["name"], c["count"]) for c in inStock["counts"]],
                      [("true", 1)])
示例#7
0
 def testParseDateFacetSearchResults(self):
     facet_xml_response = getData("date_facet_xml_response.txt").decode("utf-8")
     response = SolrResponse(facet_xml_response)
     results = response.response  # the result set is named 'response'
     self.assertEqual(results.numFound, "42")
     self.assertEqual(results.start, "0")
     self.assertEqual(len(results), 0)
     headers = response.responseHeader
     self.assertEqual(type(headers), type({}))
     self.assertEqual(headers["status"], 0)
     self.assertEqual(headers["QTime"], 5)
     self.assertEqual(headers["params"]["facet.date"], "timestamp")
     self.assertEqual(headers["params"]["facet.date.start"], "NOW/DAY-5DAYS")
     self.assertEqual(headers["params"]["facet.date.end"], "NOW/DAY+1DAY")
     self.assertEqual(headers["params"]["facet.date.gap"], "+1DAY")
     self.assertEqual(headers["params"]["rows"], "0")
     self.assertEqual(headers["params"]["facet"], "true")
     self.assertEqual(headers["params"]["indent"], "true")
     self.assertEqual(headers["params"]["q"], "*:*")
     counts = response.facet_counts
     self.assertEqual(type(counts), type({}))
     self.assertEqual(counts["facet_queries"], {})
     self.assertEqual(counts["facet_fields"], {})
     timestamps = counts["facet_dates"]["timestamp"]
     self.assertEqual(timestamps["2007-08-11T00:00:00.000Z"], 1)
     self.assertEqual(timestamps["2007-08-12T00:00:00.000Z"], 5)
     self.assertEqual(timestamps["2007-08-13T00:00:00.000Z"], 3)
     self.assertEqual(timestamps["2007-08-14T00:00:00.000Z"], 7)
     self.assertEqual(timestamps["2007-08-15T00:00:00.000Z"], 2)
     self.assertEqual(timestamps["2007-08-16T00:00:00.000Z"], 16)
     self.assertEqual(timestamps["gap"], "+1DAY")
     self.assertEqual(
         timestamps["end"].ISO8601(), DateTime("2007-08-17 GMT").ISO8601()
     )
示例#8
0
 def testParseFacetSearchResults(self):
     facet_xml_response = getData("facet_xml_response.txt").decode("utf-8")
     response = SolrResponse(facet_xml_response)
     results = response.response  # the result set is named 'response'
     self.assertEqual(results.numFound, "1")
     self.assertEqual(results.start, "0")
     self.assertEqual(len(results), 0)
     headers = response.responseHeader
     self.assertEqual(type(headers), type({}))
     self.assertEqual(headers["status"], 0)
     self.assertEqual(headers["QTime"], 1)
     self.assertEqual(headers["params"]["facet.limit"], "-1")
     self.assertEqual(headers["params"]["rows"], "0")
     self.assertEqual(headers["params"]["facet"], "true")
     self.assertEqual(headers["params"]["facet.field"], ["cat", "inStock"])
     self.assertEqual(headers["params"]["indent"], "10")
     self.assertEqual(headers["params"]["q"], "solr")
     counts = response.facet_counts
     self.assertEqual(type(counts), type({}))
     self.assertEqual(counts["facet_queries"], {})
     self.assertEqual(counts["facet_fields"]["cat"]["electronics"], 0)
     self.assertEqual(counts["facet_fields"]["cat"]["monitor"], 0)
     self.assertEqual(counts["facet_fields"]["cat"]["search"], 1)
     self.assertEqual(counts["facet_fields"]["cat"]["software"], 1)
     self.assertEqual(counts["facet_fields"]["inStock"]["true"], 1)
示例#9
0
 def testParseComplexSearchResults(self):
     complex_xml_response = getData("complex_xml_response.txt")
     response = SolrResponse(complex_xml_response)
     results = response.response  # the result set is named 'response'
     self.assertEqual(results.numFound, "2")
     self.assertEqual(response.actual_result_count, 2)
     self.assertEqual(results.start, "0")
     self.assertEqual(len(results), 2)
     first = results[0]
     self.assertEqual(first.cat, ["software", "search"])
     self.assertEqual(len(first.features), 7)
     self.assertEqual(
         [type(x).__name__ for x in first.features],
         ["str"] * 6 + [six.text_type.__name__],
     )
     self.assertEqual(first.id, "SOLR1000")
     self.assertEqual(first.inStock, True)
     self.assertEqual(
         first.incubationdate_dt.ISO8601(), DateTime("2006/01/17 GMT").ISO8601()
     )
     self.assertEqual(first.manu, "Apache Software Foundation")
     self.assertEqual(first.popularity, 10)
     self.assertEqual(first.price, 0.0)
     headers = response.responseHeader
     self.assertEqual(headers["status"], 0)
     self.assertEqual(headers["QTime"], 0)
     self.assertEqual(headers["params"]["indent"], "on")
     self.assertEqual(headers["params"]["rows"], "10")
     self.assertEqual(headers["params"]["start"], "0")
     self.assertEqual(headers["params"]["q"], "id:[* TO *]")
     self.assertEqual(headers["params"]["version"], "2.2")
示例#10
0
 def testParseFacetSearchResults(self):
     facet_xml_response = getData('facet_xml_response.txt')
     response = SolrResponse(facet_xml_response)
     results = response.response     # the result set is named 'response'
     self.assertEqual(results.numFound, '1')
     self.assertEqual(results.start, '0')
     self.assertEqual(len(results), 0)
     headers = response.responseHeader
     self.assertEqual(type(headers), type({}))
     self.assertEqual(headers['status'], 0)
     self.assertEqual(headers['QTime'], 1)
     self.assertEqual(headers['params']['facet.limit'], '-1')
     self.assertEqual(headers['params']['rows'], '0')
     self.assertEqual(headers['params']['facet'], 'true')
     self.assertEqual(headers['params']['facet.field'], ['cat', 'inStock'])
     self.assertEqual(headers['params']['indent'], '10')
     self.assertEqual(headers['params']['q'], 'solr')
     counts = response.facet_counts
     self.assertEqual(type(counts), type({}))
     self.assertEqual(counts['facet_queries'], {})
     self.assertEqual(counts['facet_fields']['cat']['electronics'], 0)
     self.assertEqual(counts['facet_fields']['cat']['monitor'], 0)
     self.assertEqual(counts['facet_fields']['cat']['search'], 1)
     self.assertEqual(counts['facet_fields']['cat']['software'], 1)
     self.assertEqual(counts['facet_fields']['inStock']['true'], 1)
示例#11
0
 def testParseDateFacetSearchResults(self):
     facet_xml_response = getData('date_facet_xml_response.txt')
     response = SolrResponse(facet_xml_response)
     results = response.response     # the result set is named 'response'
     self.assertEqual(results.numFound, '42')
     self.assertEqual(results.start, '0')
     self.assertEqual(len(results), 0)
     headers = response.responseHeader
     self.assertEqual(type(headers), type({}))
     self.assertEqual(headers['status'], 0)
     self.assertEqual(headers['QTime'], 5)
     self.assertEqual(headers['params']['facet.date'], 'timestamp')
     self.assertEqual(headers['params']['facet.date.start'],
         'NOW/DAY-5DAYS')
     self.assertEqual(headers['params']['facet.date.end'], 'NOW/DAY+1DAY')
     self.assertEqual(headers['params']['facet.date.gap'], '+1DAY')
     self.assertEqual(headers['params']['rows'], '0')
     self.assertEqual(headers['params']['facet'], 'true')
     self.assertEqual(headers['params']['indent'], 'true')
     self.assertEqual(headers['params']['q'], '*:*')
     counts = response.facet_counts
     self.assertEqual(type(counts), type({}))
     self.assertEqual(counts['facet_queries'], {})
     self.assertEqual(counts['facet_fields'], {})
     timestamps = counts['facet_dates']['timestamp']
     self.assertEqual(timestamps['2007-08-11T00:00:00.000Z'], 1)
     self.assertEqual(timestamps['2007-08-12T00:00:00.000Z'], 5)
     self.assertEqual(timestamps['2007-08-13T00:00:00.000Z'], 3)
     self.assertEqual(timestamps['2007-08-14T00:00:00.000Z'], 7)
     self.assertEqual(timestamps['2007-08-15T00:00:00.000Z'], 2)
     self.assertEqual(timestamps['2007-08-16T00:00:00.000Z'], 16)
     self.assertEqual(timestamps['gap'], '+1DAY')
     self.assertEqual(timestamps['end'], DateTime('2007-08-17 GMT'))
示例#12
0
    def test_suggestions_querystring_with_list_parameter(self):
        portal = self.layer['portal']
        request = self.layer['request']

        # Setup browser layers
        notify(BeforeTraverseEvent(portal, request))

        request.form.update({'SearchableText': 'bidlung',
                             'facet.field': ['portal_type', 'review_state']})
        view = getMultiAdapter((portal, request), name=u'search')
        view.solr_response = SolrResponse()
        view.solr_response.spellcheck = {}
        view.solr_response.spellcheck['suggestions'] = {
            'bidlung': {'endOffset': 246,
                        'numFound': 5,
                        'origFreq': 1,
                        'startOffset': 239,
                        'suggestion': [{'freq': 2704, 'word': 'bildung'},
                                       {'freq': 1, 'word': 'bidlungs'},
                                       {'freq': 1, 'word': 'bidung'},
                                       {'freq': 561, 'word': 'bildungs'},
                                       {'freq': 233, 'word': 'bislang'}]},
            'correctlySpelled': False,
        }

        suggestions = view.suggestions()
        self.assertEquals('bildung', suggestions[0][0])
        self.assertIn('&facet.field=portal_type&facet.field=review_state',
                      suggestions[0][1])
        self.assertIn('&SearchableText=bildung', suggestions[0][1])
示例#13
0
 def search(self, query, **parameters):
     """ perform a search with the given querystring and parameters """
     start = time()
     config = queryUtility(ISolrConnectionConfig)
     manager = self.getManager()
     manager.setSearchTimeout()
     connection = manager.getConnection()
     if connection is None:
         raise SolrInactiveException
     if not 'rows' in parameters:
         parameters['rows'] = config.max_results or ''
         logger.info(
             'falling back to "max_results" (%d) without a "rows" '
             'parameter: %r (%r)', config.max_results, query, parameters)
     if isinstance(query, dict):
         query = ' '.join(query.values())
     logger.debug('searching for %r (%r)', query, parameters)
     if 'sort' in parameters:  # issue warning for unknown sort indices
         index, order = parameters['sort'].split()
         schema = manager.getSchema() or {}
         field = schema.get(index, None)
         if field is None or not field.stored:
             logger.warning('sorting on non-stored attribute "%s"', index)
     response = connection.search(q=query, **parameters)
     results = SolrResponse(response)
     response.close()
     manager.setTimeout(None)
     elapsed = (time() - start) * 1000
     slow = config.slow_query_threshold
     if slow and elapsed >= slow:
         logger.info('slow query: %d/%d ms for %r (%r)',
                     results.responseHeader['QTime'], elapsed, query,
                     parameters)
     return results
示例#14
0
 def search(self, query, **parameters):
     """ perform a search with the given querystring and parameters """
     start = time()
     config = self.getConfig()
     manager = self.getManager()
     manager.setSearchTimeout()
     connection = manager.getConnection()
     if connection is None:
         raise SolrInactiveException
     if 'rows' not in parameters:
         parameters['rows'] = config.max_results or 10000000
         # Check if rows param is 0 for backwards compatibility. Before
         # Solr 4 'rows = 0' meant that there is no limitation. Solr 4
         # always expects a rows param > 0 though:
         # http://wiki.apache.org/solr/CommonQueryParameters#rows
         if parameters['rows'] == 0:
             parameters['rows'] = 10000000
         logger.debug(
             'falling back to "max_results" (%d) without a "rows" '
             'parameter: %r (%r)', config.max_results, query, parameters)
     if getattr(config, 'highlight_fields', None):
         if parameters.get('hl', 'false') == 'true'\
                 and 'hl.fl' not in parameters:
             parameters['hl'] = 'true'
             parameters['hl.fl'] = config.highlight_fields or []
             parameters['hl.simple.pre'] =\
                 config.highlight_formatter_pre or ' '
             parameters['hl.simple.post'] =\
                 config.highlight_formatter_post or ' '
             parameters['hl.fragsize'] =\
                 getattr(config, 'highlight_fragsize', None) or 100
     if 'fl' not in parameters:
         if config.field_list:
             parameters['fl'] = ' '.join(config.field_list)
         else:
             parameters['fl'] = '* score'
     if isinstance(query, dict):
         query = u' '.join([safe_unicode(val)
                            for val in query.values()]).encode('utf-8')
     logger.debug('searching for %r (%r)', query, parameters)
     if 'sort' in parameters:  # issue warning for unknown sort indices
         index, order = parameters['sort'].split()
         schema = manager.getSchema() or {}
         field = schema.get(index, None)
         if field is None or not field.stored:
             logger.warning('sorting on non-stored attribute "%s"', index)
     response = connection.search(q=query, **parameters)
     results = SolrResponse(response)
     response.close()
     manager.setTimeout(None)
     elapsed = (time() - start) * 1000
     slow = config.slow_query_threshold
     if slow and elapsed >= slow:
         logger.info('slow query: %d/%d ms for %r (%r)',
                     results.responseHeader['QTime'], elapsed, query,
                     parameters)
     logger.debug('highlighting info: %s' %
                  getattr(results, 'highlighting', {}))
     return results
def solrSearchResults(request=None, **keywords):
    """ perform a query using solr after translating the passed in
        parameters with portal catalog semantics """
    site = getSite()
    search = queryUtility(ISearch, context=site)
    config = queryUtility(ISolrConnectionConfig, context=site)

    if request is None:
        # try to get a request instance, so that flares can be adapted to
        # ploneflares and urls can be converted into absolute ones etc;
        # however, in this case any arguments from the request are ignored
        args = deepcopy(keywords)
        request = getattr(site, 'REQUEST', None)
    elif IHTTPRequest.providedBy(request):
        args = deepcopy(request.form)
        args.update(keywords)  # keywords take precedence
    else:
        assert isinstance(request, dict), request
        args = deepcopy(request)
        args.update(keywords)  # keywords take precedence
        # if request is a dict, we need the real request in order to
        # be able to adapt to plone flares
        request = getattr(site, 'REQUEST', args)

    if 'path' in args and 'navtree' in args['path']:
        raise FallBackException  # we can't handle navtree queries yet

    use_solr = args.get('use_solr', False)  # A special key to force Solr
    if not use_solr and config.required:
        required = set(config.required).intersection(args)
        if required:
            for key in required:
                if not args[key]:
                    raise FallBackException
        else:
            raise FallBackException

    query, params = search.buildQueryAndParameters(**args)

    if query != {}:
        __traceback_info__ = (query, params, args)
        response = search(query, **params)
    else:
        return SolrResponse()

    def wrap(flare):
        """ wrap a flare object with a helper class """
        adapter = queryMultiAdapter((flare, request), IFlare)
        return adapter is not None and adapter or flare

    schema = search.getManager().getSchema() or {}
    results = response.results()
    for idx, flare in enumerate(results):
        flare = wrap(flare)
        for missing in set(schema.stored).difference(flare):
            flare[missing] = MV
        results[idx] = wrap(flare)
    padResults(results, **params)  # pad the batch
    return response
示例#16
0
 def search(self, query, **parameters):
     """ perform a search with the given querystring and parameters """
     start = time()
     config = queryUtility(ISolrConnectionConfig)
     manager = self.getManager()
     manager.setSearchTimeout()
     connection = manager.getConnection()
     if connection is None:
         raise SolrInactiveException
     if not 'rows' in parameters:
         parameters['rows'] = config.max_results or ''
         logger.info(
             'falling back to "max_results" (%d) without a "rows" '
             'parameter: %r (%r)', config.max_results, query, parameters)
     if getattr(config, 'highlight_fields', None):
         if parameters.get('hl',
                           'false') == 'true' and not 'hl.fl' in parameters:
             parameters['hl'] = 'true'
             parameters['hl.fl'] = config.highlight_fields or []
             parameters[
                 'hl.simple.pre'] = config.highlight_formatter_pre or ' '
             parameters[
                 'hl.simple.post'] = config.highlight_formatter_post or ' '
             parameters['hl.fragsize'] = getattr(
                 config, 'highlight_fragsize', None) or 100
     if not 'fl' in parameters:
         if config.field_list:
             parameters['fl'] = ' '.join(config.field_list)
         else:
             parameters['fl'] = '* score'
     if isinstance(query, dict):
         query = ' '.join(query.values())
     logger.debug('searching for %r (%r)', query, parameters)
     if 'sort' in parameters:  # issue warning for unknown sort indices
         index, order = parameters['sort'].split()
         schema = manager.getSchema() or {}
         field = schema.get(index, None)
         if field is None or not field.stored:
             logger.warning('sorting on non-stored attribute "%s"', index)
     response = connection.search(q=query, **parameters)
     results = SolrResponse(response)
     response.close()
     manager.setTimeout(None)
     elapsed = (time() - start) * 1000
     slow = config.slow_query_threshold
     if slow and elapsed >= slow:
         logger.info('slow query: %d/%d ms for %r (%r)',
                     results.responseHeader['QTime'], elapsed, query,
                     parameters)
     logger.debug('highlighting info: %s' %
                  getattr(results, 'highlighting', {}))
     return results
示例#17
0
 def testConvertFacetResponse(self):
     response = SolrResponse(getData('facet_xml_response.txt'))
     fields = response.facet_counts['facet_fields']
     info = convertFacets(fields, request=TestRequest())
     # the info should consist of 2 dicts with `field` and `counts` keys
     self.assertEqual([sorted(i) for i in info], [['counts', 'title']] * 2)
     # next let's check the field names
     self.assertEqual([i['title'] for i in info], ['cat', 'inStock'])
     # and the fields contents
     cat, inStock = info
     self.assertEqual(cat['title'], 'cat')
     self.assertEqual([(c['name'], c['title'], c['count'])
                       for c in cat['counts']],
                      [('search', 'Title of Search', 1),
                       ('software', 'Title of Software', 1),
                       ('electronics', 'Title of Electronics', 0),
                       ('monitor', 'Title of Monitor', 0)])
     self.assertEqual(inStock['title'], 'inStock')
     self.assertEqual([(c['name'], c['count']) for c in inStock['counts']],
                      [('true', 1)])
示例#18
0
    def test_facets_order(self):
        portal = self.layer['portal']
        request = self.layer['request']
        request.form.update({'facet_field': ['type', 'section', 'topics']})
        response = SolrResponse(getData('facets_response.xml'))
        view = SearchFacetsView(portal, request)
        view.kw = dict(results=response)

        config = queryUtility(ISolrConnectionConfig)
        config.facets = ['type', 'section', 'topics']
        facets = view.facets()
        self.assertEquals(['type', 'section', 'topics'],
            [facets[0]['title'], facets[1]['title'], facets[2]['title']],
            msg='Wrong facet order.')

        config.facets = ['section', 'topics', 'type']
        facets = view.facets()
        self.assertEquals(['section', 'topics', 'type'],
            [facets[0]['title'], facets[1]['title'], facets[2]['title']],
            msg='Wrong facet order.')
示例#19
0
 def testParseSimpleSearchResults(self):
     search_response = getData('search_response.txt')
     response = SolrResponse(search_response.split('\n\n', 1)[1])
     results = response.response     # the result set is named 'response'
     self.assertEqual(results.numFound, '1')
     self.assertEqual(results.start, '0')
     match = results[0]
     self.assertEqual(len(results), 1)
     self.assertEqual(match.id, '500')
     self.assertEqual(match.name, 'python test doc')
     self.assertEqual(match.popularity, 0)
     self.assertEqual(match.sku, '500')
     self.assertEqual(match.timestamp,
         DateTime('2008-02-29 16:11:46.998 GMT'))
     headers = response.responseHeader
     self.assertEqual(headers['status'], 0)
     self.assertEqual(headers['QTime'], 0)
     self.assertEqual(headers['params']['wt'], 'xml')
     self.assertEqual(headers['params']['indent'], 'on')
     self.assertEqual(headers['params']['rows'], '10')
     self.assertEqual(headers['params']['q'], 'id:[* TO *]')
示例#20
0
 def testParseSimpleSearchResults(self):
     search_response = getData("search_response.txt").decode("utf-8")
     response = SolrResponse(search_response.split("\n\n", 1)[1])
     results = response.response  # the result set is named 'response'
     self.assertEqual(results.numFound, "1")
     self.assertEqual(results.start, "0")
     self.assertEqual(response.actual_result_count, 1)
     match = results[0]
     self.assertEqual(len(results), 1)
     self.assertEqual(match.id, "500")
     self.assertEqual(match.name, "python test doc")
     self.assertEqual(match.popularity, 0)
     self.assertEqual(match.sku, "500")
     self.assertEqual(
         match.timestamp.ISO8601(), DateTime("2008-02-29 16:11:46.998 GMT").ISO8601()
     )
     headers = response.responseHeader
     self.assertEqual(headers["status"], 0)
     self.assertEqual(headers["QTime"], 0)
     self.assertEqual(headers["params"]["wt"], "xml")
     self.assertEqual(headers["params"]["indent"], "on")
     self.assertEqual(headers["params"]["rows"], "10")
     self.assertEqual(headers["params"]["q"], "id:[* TO *]")
示例#21
0
    def test_suggestions(self):
        portal = self.layer['portal']
        request = self.layer['request']

        # Setup browser layers
        notify(BeforeTraverseEvent(portal, request))

        request.form.update({'SearchableText': 'bidlung', })
        view = getMultiAdapter((portal, request), name=u'search')
        view.solr_response = SolrResponse()
        view.solr_response.spellcheck = {}
        view.solr_response.spellcheck['suggestions'] = {
            'bidlung': {'endOffset': 246,
                        'numFound': 5,
                        'origFreq': 1,
                        'startOffset': 239,
                        'suggestion': [{'freq': 2704, 'word': 'bildung'},
                                       {'freq': 1, 'word': 'bidlungs'},
                                       {'freq': 1, 'word': 'bidung'},
                                       {'freq': 561, 'word': 'bildungs'},
                                       {'freq': 233, 'word': 'bislang'}]},
            'platform': {'endOffset': 336,
                         'numFound': 5,
                         'origFreq': 9,
                         'startOffset': 328,
                         'suggestion': [{'freq': 557, 'word': 'plattform'},
                                        {'freq': 2, 'word': 'platforma'},
                                        {'freq': 2, 'word': 'platforme'},
                                        {'freq': 2, 'word': 'platforms'},
                                        {'freq': 7, 'word': 'plateforme'}]},
            'correctlySpelled': False,
        }

        suggestions = view.suggestions()
        self.assertEquals(suggestions[0][0], 'bildung')
        self.assertEquals(suggestions[0][1], '&SearchableText=bildung')
示例#22
0
    def sync(self, batch=1000):
        """Sync the Solr index with the portal catalog. Records contained
        in the catalog but not in Solr will be indexed and records not
        contained in the catalog will be removed.
        """
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        key = queryUtility(ISolrConnectionManager).getSchema().uniqueKey
        zodb_conn = self.context._p_jar
        catalog = getToolByName(self.context, 'portal_catalog')
        getIndex = catalog._catalog.getIndex
        modified_index = getIndex('modified')
        uid_index = getIndex(key)
        log = self.mklog()
        real = timer()  # real time
        lap = timer()  # real lap time (for intermediate commits)
        cpu = timer(clock)  # cpu time
        # get Solr status
        query = '+%s:[* TO *]' % key
        response = conn.search(q=query, rows=MAX_ROWS, fl='%s modified' % key)
        # avoid creating DateTime instances
        simple_unmarshallers = unmarshallers.copy()
        simple_unmarshallers['date'] = parse_date_as_datetime
        flares = SolrResponse(response, simple_unmarshallers)
        response.close()
        solr_results = {}
        solr_uids = set()

        def _utc_convert(value):
            t_tup = value.utctimetuple()
            return (((
                (t_tup[0] * 12 + t_tup[1]) * 31 + t_tup[2]) * 24 + t_tup[3]) *
                    60 + t_tup[4])

        for flare in flares:
            uid = flare[key]
            solr_uids.add(uid)
            solr_results[uid] = _utc_convert(flare['modified'])
        # get catalog status
        cat_results = {}
        cat_uids = set()
        for uid, rid in uid_index._index.items():
            cat_uids.add(uid)
            cat_results[uid] = rid
        # differences
        index = cat_uids.difference(solr_uids)
        solr_uids.difference_update(cat_uids)
        unindex = solr_uids
        processed = 0
        flush = notimeout(lambda: conn.flush())

        def checkPoint():
            msg = 'intermediate commit (%d items processed, ' \
                  'last batch in %s)...\n' % (processed, lap.next())
            log(msg)
            logger.info(msg)
            flush()
            zodb_conn.cacheGC()

        cpi = checkpointIterator(checkPoint, batch)
        # Look up objects
        uid_rid_get = cat_results.get
        rid_path_get = catalog._catalog.paths.get
        catalog_traverse = catalog.unrestrictedTraverse

        def lookup(uid,
                   rid=None,
                   uid_rid_get=uid_rid_get,
                   rid_path_get=rid_path_get,
                   catalog_traverse=catalog_traverse):
            if rid is None:
                rid = uid_rid_get(uid)
            if not rid:
                return None
            if not isinstance(rid, int):
                rid = tuple(rid)[0]
            path = rid_path_get(rid)
            if not path:
                return None
            try:
                obj = catalog_traverse(path)
            except AttributeError:
                return None
            return obj

        log('processing %d "unindex" operations next...\n' % len(unindex))
        op = notimeout(lambda uid: conn.delete(id=uid))
        for uid in unindex:
            obj = lookup(uid)
            if obj is None:
                op(uid)
                processed += 1
                cpi.next()
            else:
                log('not unindexing existing object %r.\n' % uid)
        log('processing %d "index" operations next...\n' % len(index))
        op = notimeout(lambda obj: proc.index(obj))
        for uid in index:
            obj = lookup(uid)
            if indexable(obj):
                op(obj)
                processed += 1
                cpi.next()
            else:
                log('not indexing unindexable object %r.\n' % uid)
            if obj is not None:
                obj._p_deactivate()
        log('processing "reindex" operations next...\n')
        op = notimeout(lambda obj: proc.reindex(obj))
        cat_mod_get = modified_index._unindex.get
        solr_mod_get = solr_results.get
        done = unindex.union(index)
        for uid, rid in cat_results.items():
            if uid in done:
                continue
            if isinstance(rid, IITreeSet):
                rid = rid.keys()[0]
            if cat_mod_get(rid) != solr_mod_get(uid):
                obj = lookup(uid, rid=rid)
                if indexable(obj):
                    op(obj)
                    processed += 1
                    cpi.next()
                else:
                    log('not reindexing unindexable object %r.\n' % uid)
                if obj is not None:
                    obj._p_deactivate()
        conn.commit()
        log('solr index synced.\n')
        msg = 'processed %d object(s) in %s (%s cpu time).'
        msg = msg % (processed, real.next(), cpu.next())
        log(msg)
        logger.info(msg)
示例#23
0
 def search(self,
            query,
            wt="xml",
            sow="true",
            lowercase_operator="true",
            default_operator="AND",
            **parameters):
     """ perform a search with the given querystring and parameters """
     start = time()
     config = self.getConfig()
     manager = self.getManager()
     manager.setSearchTimeout()
     connection = manager.getConnection()
     if connection is None:
         raise SolrInactiveException
     parameters["wt"] = wt
     parameters["sow"] = sow  # split on whitespace
     parameters["lowercaseOperators"] = lowercase_operator
     parameters["q.op"] = default_operator
     if "rows" not in parameters:
         parameters["rows"] = config.max_results or 10000000
         # Check if rows param is 0 for backwards compatibility. Before
         # Solr 4 'rows = 0' meant that there is no limitation. Solr 4
         # always expects a rows param > 0 though:
         # http://wiki.apache.org/solr/CommonQueryParameters#rows
         if parameters["rows"] == 0:
             parameters["rows"] = 10000000
         logger.debug(
             'falling back to "max_results" (%d) without a "rows" '
             "parameter: %r (%r)",
             config.max_results,
             query,
             parameters,
         )
     if getattr(config, "highlight_fields", None):
         if parameters.get("hl",
                           "false") == "true" and "hl.fl" not in parameters:
             parameters["hl"] = "true"
             parameters["hl.fl"] = config.highlight_fields or []
             parameters[
                 "hl.simple.pre"] = config.highlight_formatter_pre or " "
             parameters[
                 "hl.simple.post"] = config.highlight_formatter_post or " "
             parameters["hl.fragsize"] = (getattr(
                 config, "highlight_fragsize", None) or 100)
     if "fl" not in parameters:
         if config.field_list:
             parameters["fl"] = " ".join(config.field_list)
         else:
             parameters["fl"] = "* score"
     if isinstance(query, dict):
         query = u" ".join([safe_unicode(val) for val in query.values()])
     logger.debug("searching for %r (%r)", query, parameters)
     if "sort" in parameters:  # issue warning for unknown sort indices
         index, order = parameters["sort"].split()
         schema = manager.getSchema() or {}
         field = schema.get(index, None)
         if field is None or not field.stored:
             logger.warning('sorting on non-stored attribute "%s"', index)
     response = connection.search(q=query, **parameters)
     results = SolrResponse(response)
     response.close()
     manager.setTimeout(None)
     elapsed = (time() - start) * 1000
     slow = config.slow_query_threshold
     if slow and elapsed >= slow:
         logger.info(
             "slow query: %d/%d ms for %r (%r)",
             results.responseHeader["QTime"],
             elapsed,
             query,
             parameters,
         )
     logger.debug("highlighting info: %s" %
                  getattr(results, "highlighting", {}))
     return results
示例#24
0
    def cleanup(self, batch=1000):
        """remove entries from solr that don't have a corresponding Zope
        object or have a different UID than the real object"""
        manager = queryUtility(ISolrConnectionManager)
        proc = SolrIndexProcessor(manager)
        conn = manager.getConnection()
        log = self.mklog(use_std_log=True)
        log("cleaning up solr index...\n")
        key = manager.getSchema().uniqueKey

        start = 0
        resp = SolrResponse(conn.search(q="*:*", rows=batch, start=start))
        res = resp.results()
        log("%s items in solr catalog\n" % resp.response.numFound)
        deleted = 0
        reindexed = 0
        while len(res) > 0:
            for flare in res:
                try:
                    ob = PloneFlare(flare).getObject()
                except Exception as err:
                    log("Error getting object, removing: %s (%s)\n" %
                        (flare["path_string"], err))
                    conn.delete(flare[key])
                    deleted += 1
                    continue
                if ob is None:
                    log("Object not found, removing: %s\n" %
                        (flare["path_string"]))
                    conn.delete(flare[key])
                    deleted += 1
                    continue
                if not IUUIDAware.providedBy(ob):
                    no_skipping_msg = ("Object %s of type %s does not " +
                                       "support uuids, skipping.\n")
                    log(no_skipping_msg %
                        ("/".join(ob.getPhysicalPath()), ob.meta_type))
                    continue
                uuid = IUUID(ob)
                if uuid != flare[key]:
                    log("indexed under wrong UID, removing: %s\n" %
                        flare["path_string"])
                    conn.delete(flare[key])
                    deleted += 1
                    realob_res = SolrResponse(
                        conn.search(q="%s:%s" % (key, uuid))).results()
                    if len(realob_res) == 0:
                        log("no sane entry for last object, reindexing\n")
                        data, missing = proc.getData(ob)
                        prepareData(data)
                        if not missing:
                            boost = boost_values(ob, data)
                            conn.add(boost_values=boost, **data)
                            reindexed += 1
                        else:
                            log("  missing data, cannot index.\n")
            log("handled batch of %d items, committing\n" % len(res))
            conn.commit()
            start += batch
            resp = SolrResponse(conn.search(q="*:*", rows=batch, start=start))
            res = resp.results()
        finished_msg = ("solr cleanup finished, %s item(s) removed, " +
                        "%s item(s) reindexed\n")
        msg = finished_msg % (deleted, reindexed)
        log(msg)
        logger.info(msg)
示例#25
0
 def test5(self):
     SolrResponse(self.data)
示例#26
0
 def testParseQuirkyResponse(self):
     quirky_response = getData("quirky_response.txt").decode("utf-8")
     response = SolrResponse(quirky_response)
     results = response.response  # the result set is named 'response'
     empty_uid = [r for r in results if r.UID == ""]
     self.assertEqual(empty_uid, [])
示例#27
0
 def results(self):
     xml_response = getData('quirky_response.txt')
     response = SolrResponse(xml_response)
     return response.response  # the result set is named 'response'
示例#28
0
 def testParseQuirkyResponse(self):
     quirky_response = getData('quirky_response.txt')
     response = SolrResponse(quirky_response)
     results = response.response     # the result set is named 'response'
     empty_uid = [r for r in results if r.UID == '']
     self.assertEqual(empty_uid, [])