Пример #1
0
    def setUp(self):
        super(SolrTestCase, self).setUp()
        self.default_solr = SolrTornado('http://localhost:8983/solr/core0')
        # Short timeouts.
        self.solr = SolrTornado('http://localhost:8983/solr/core0', timeout=2)
        self.docs = [
            {
                'id': 'doc_1',
                'title': 'Example doc 1',
                'price': 12.59,
                'popularity': 10,
            },
            {
                'id': 'doc_2',
                'title': 'Another example ☃ doc 2',
                'price': 13.69,
                'popularity': 7,
            },
            {
                'id': 'doc_3',
                'title': 'Another thing',
                'price': 2.35,
                'popularity': 8,
            },
            {
                'id': 'doc_4',
                'title': 'doc rock',
                'price': 99.99,
                'popularity': 10,
            },
            {
                'id': 'doc_5',
                'title': 'Boring',
                'price': 1.12,
                'popularity': 2,
            },
        ]
        
        requests.post('http://localhost:8983/solr/core0/update',
                      headers={'Content-Type': 'text/xml'},
                      data='<delete><query>*:*</query></delete>')


        requests.post('http://localhost:8983/solr/core0/update/json?commit=true',
                      headers={'Content-type': 'application/json'},
                      data=json.dumps(self.docs))
Пример #2
0
class SolrTestCase(AsyncTestCase):

    def setUp(self):
        super(SolrTestCase, self).setUp()
        self.default_solr = SolrTornado('http://localhost:8983/solr/core0')
        # Short timeouts.
        self.solr = SolrTornado('http://localhost:8983/solr/core0', timeout=2)
        self.docs = [
            {
                'id': 'doc_1',
                'title': 'Example doc 1',
                'price': 12.59,
                'popularity': 10,
            },
            {
                'id': 'doc_2',
                'title': 'Another example ☃ doc 2',
                'price': 13.69,
                'popularity': 7,
            },
            {
                'id': 'doc_3',
                'title': 'Another thing',
                'price': 2.35,
                'popularity': 8,
            },
            {
                'id': 'doc_4',
                'title': 'doc rock',
                'price': 99.99,
                'popularity': 10,
            },
            {
                'id': 'doc_5',
                'title': 'Boring',
                'price': 1.12,
                'popularity': 2,
            },
        ]
        
        requests.post('http://localhost:8983/solr/core0/update',
                      headers={'Content-Type': 'text/xml'},
                      data='<delete><query>*:*</query></delete>')


        requests.post('http://localhost:8983/solr/core0/update/json?commit=true',
                      headers={'Content-type': 'application/json'},
                      data=json.dumps(self.docs))


    def tearDown(self):
        requests.post('http://localhost:8983/solr/core0/update',
                      headers={'Content-Type': 'text/xml'},
                      data='<delete><query>*:*</query></delete>')
        super(SolrTestCase, self).tearDown()

    def test_init(self):
        self.assertEqual(self.default_solr.url, 'http://localhost:8983/solr/core0')
        self.assertTrue(isinstance(self.default_solr.decoder, json.JSONDecoder))
        self.assertEqual(self.default_solr.timeout, 60)

        self.assertEqual(self.solr.url, 'http://localhost:8983/solr/core0')
        self.assertTrue(isinstance(self.solr.decoder, json.JSONDecoder))
        self.assertEqual(self.solr.timeout, 2)

    def test__create_full_url(self):
        # Nada.
        self.assertEqual(self.solr._create_full_url(path=''), 'http://localhost:8983/solr/core0')
        # Basic path.
        self.assertEqual(self.solr._create_full_url(path='pysolr_tests'), 'http://localhost:8983/solr/core0/pysolr_tests')
        # Leading slash (& making sure we don't touch the trailing slash).
        self.assertEqual(self.solr._create_full_url(path='/pysolr_tests/select/?whatever=/'), 'http://localhost:8983/solr/core0/pysolr_tests/select/?whatever=/')

    @gen_test
    def test__send_request(self):
        # Test a valid request.
        resp_body = yield self.solr._send_request('GET', 'select/?q=doc&wt=json')
        self.assertTrue('"numFound":3' in resp_body)

        # Test a lowercase method & a body.
        xml_body = '<add><doc><field name="id">doc_12</field><field name="title">Whee!</field></doc></add>'
        resp_body = yield self.solr._send_request('POST',
                                                  'update/?commit=true',
                                                  body=xml_body,
                                                  headers={
                                                      'Content-type': 'text/xml; charset=utf-8',
                                                  })
        self.assertTrue('<int name="status">0</int>' in resp_body)

        # Test a non-existent URL.
        old_url = self.solr.url
        self.solr.url = 'http://127.0.0.1:567898/wahtever'
        #self.assertRaises(SolrError, self.solr._send_request, 'get', 'select/?q=doc&wt=json')

        try:
            yield self.solr._send_request('get', 'select/?q=doc&wt=json')
        except SolrTornadoError:
            pass
        else:
            raise AssertionError, "did not raise SolrTornadoError"

        self.solr.url = old_url

    @gen_test
    def test__select(self):
        # Short params.
        resp_body = yield self.solr._select({'q': 'doc'})
        resp_data = json.loads(resp_body)
        self.assertEqual(resp_data['response']['numFound'], 3)

        # Long params.
        resp_body = yield self.solr._select({'q': 'doc' * 1024})
        resp_data = json.loads(resp_body)
        self.assertEqual(resp_data['response']['numFound'], 0)
        self.assertEqual(len(resp_data['responseHeader']['params']['q']), 3 * 1024)

    @gen_test
    def test__mlt(self):
        resp_body = yield self.solr._mlt({'q': 'id:doc_1', 'mlt.fl': 'title'})
        resp_data = json.loads(resp_body)
        self.assertEqual(resp_data['response']['numFound'], 0)

    @gen_test
    def test__suggest_terms(self):
        resp_body = yield self.solr._select({'terms.fl': 'title'})
        resp_data = json.loads(resp_body)
        self.assertEqual(resp_data['response']['numFound'], 0)

    @gen_test
    def test__update(self):
        xml_body = '<add><doc><field name="id">doc_12</field><field name="title">Whee!</field></doc></add>'
        resp_body = yield self.solr._update(xml_body)
        self.assertTrue('<int name="status">0</int>' in resp_body)

    def test__extract_error(self):
        class RubbishResponse(object):
            def __init__(self, content, headers=None):
                self.content = content
                self.headers = headers

                if self.headers is None:
                    self.headers = {}

        # Just the reason.
        resp_1 = RubbishResponse("We don't care.", {'reason': 'Something went wrong.'})
        self.assertEqual(self.solr._extract_error(resp_1), "[Reason: Something went wrong.]")

        # Empty reason.
        resp_2 = RubbishResponse("We don't care.", {'reason': None})
        self.assertEqual(self.solr._extract_error(resp_2), "[Reason: None]\nWe don't care.")

        # No reason. Time to scrape.
        resp_3 = RubbishResponse('<html><body><pre>Something is broke.</pre></body></html>', {'server': 'jetty'})
        self.assertEqual(self.solr._extract_error(resp_3), "[Reason: Something is broke.]")

    def test__scrape_response(self):
        # Tomcat.
        resp_1 = self.solr._scrape_response({'server': 'coyote'}, '<html><body><p><span>Error message</span><span>messed up.</span></p></body></html>')
        self.assertEqual(resp_1, ('messed up.', ''))

        # Jetty.
        resp_2 = self.solr._scrape_response({'server': 'jetty'}, '<html><body><pre>Something is broke.</pre></body></html>')
        self.assertEqual(resp_2, ('Something is broke.', u''))

        # Broken Tomcat.
        resp_3 = self.solr._scrape_response({'server': 'coyote'}, '<html><body><p>Really broken. Scraping Java-generated HTML sucks.</pre></body></html>')
        self.assertEqual(resp_3, (None, u'<div><body><p>Really broken. Scraping Java-generated HTML sucks.</p></body></div>'))

        # Other.
        resp_4 = self.solr._scrape_response({'server': 'crapzilla'}, '<html><head><title>Wow. Seriously weird.</title></head><body><pre>Something is broke.</pre></body></html>')
        self.assertEqual(resp_4, ('Wow. Seriously weird.', u''))

    def test__from_python(self):
        self.assertEqual(self.solr._from_python(datetime.date(2013, 1, 18)), '2013-01-18T00:00:00Z')
        self.assertEqual(self.solr._from_python(datetime.datetime(2013, 1, 18, 0, 30, 28)), '2013-01-18T00:30:28Z')
        self.assertEqual(self.solr._from_python(True), 'true')
        self.assertEqual(self.solr._from_python(False), 'false')
        self.assertEqual(self.solr._from_python(1), '1')
        self.assertEqual(self.solr._from_python(1.2), '1.2')
        self.assertEqual(self.solr._from_python(b'hello'), 'hello')
        self.assertEqual(self.solr._from_python('hello ☃'), 'hello ☃')

    def test__to_python(self):
        self.assertEqual(self.solr._to_python('2013-01-18T00:00:00Z'), datetime.datetime(2013, 1, 18))
        self.assertEqual(self.solr._to_python('2013-01-18T00:30:28Z'), datetime.datetime(2013, 1, 18, 0, 30, 28))
        self.assertEqual(self.solr._to_python('true'), True)
        self.assertEqual(self.solr._to_python('false'), False)
        self.assertEqual(self.solr._to_python(1), 1)
        self.assertEqual(self.solr._to_python(1.2), 1.2)
        self.assertEqual(self.solr._to_python(b'hello'), 'hello')
        self.assertEqual(self.solr._to_python('hello ☃'), 'hello ☃')
        self.assertEqual(self.solr._to_python(['foo', 'bar']), 'foo')
        self.assertEqual(self.solr._to_python(('foo', 'bar')), 'foo')
        self.assertEqual(self.solr._to_python('tuple("foo", "bar")'), 'tuple("foo", "bar")')

    def test__is_null_value(self):
        self.assertTrue(self.solr._is_null_value(None))
        self.assertTrue(self.solr._is_null_value(''))

        self.assertFalse(self.solr._is_null_value('Hello'))
        self.assertFalse(self.solr._is_null_value(1))

    @gen_test
    def test_search(self):
        results = yield self.solr.search('doc')
        self.assertEqual(len(results), 3)

        results = yield self.solr.search('example')
        self.assertEqual(len(results), 2)

        results = yield self.solr.search('nothing')
        self.assertEqual(len(results), 0)

        # Advanced options.
        results = yield self.solr.search('doc', **{
            'debug': 'true',
            'hl': 'true',
            'hl.fragsize': 8,
            'facet': 'on',
            'facet.field': 'popularity',
            'spellcheck': 'true',
            'spellcheck.collate': 'true',
            'spellcheck.count': 1,
            # TODO: Can't get these working in my test setup.
            # 'group': 'true',
            # 'group.field': 'id',
        })
        self.assertEqual(len(results), 3)
        self.assertTrue('explain' in results.debug)
        self.assertEqual(results.highlighting, {u'doc_4': {}, u'doc_2': {}, u'doc_1': {}})
        self.assertEqual(results.spellcheck, {})
        self.assertEqual(results.facets['facet_fields']['popularity'], ['10', 2, '7', 1, '2', 0, '8', 0])
        self.assertTrue(results.qtime is not None)
        # TODO: Can't get these working in my test setup.
        # self.assertEqual(results.grouped, '')

    def test_more_like_this(self):
        results = yield self.solr.more_like_this('id:doc_1', 'text')
        self.assertEqual(len(results), 0)

    @gen_test
    def test_suggest_terms(self):
        results = yield self.solr.suggest_terms('title', '')
        self.assertEqual(len(results), 1)
        self.assertEqual(results, {'title': [('doc', 3), ('another', 2), ('example', 2), ('1', 1), ('2', 1), ('boring', 1), ('rock', 1), ('thing', 1)]})

    def test__build_doc(self):
        doc = {
            'id': 'doc_1',
            'title': 'Example doc ☃ 1',
            'price': 12.59,
            'popularity': 10,
        }
        doc_xml = force_unicode(ET.tostring(self.solr._build_doc(doc), encoding='utf-8'))
        self.assertTrue('<field name="title">Example doc ☃ 1</field>' in doc_xml)
        self.assertTrue('<field name="id">doc_1</field>' in doc_xml)
        self.assertEqual(len(doc_xml), 152)

    @gen_test
    def test_add(self):
        self.assertEqual(len((yield self.solr.search('doc'))), 3)
        self.assertEqual(len((yield self.solr.search('example'))), 2)

        yield self.solr.add([
            {
                'id': 'doc_6',
                'title': 'Newly added doc',
            },
            {
                'id': 'doc_7',
                'title': 'Another example doc',
            },
        ])

        self.assertEqual(len((yield self.solr.search('doc'))), 5)
        self.assertEqual(len((yield self.solr.search('example'))), 3)

    @gen_test
    def test_add_with_boost(self):
        self.assertEqual(len((yield self.solr.search('doc'))), 3)

        yield self.solr.add([{'id': 'doc_6', 'title': 'Important doc'}],
                            boost={'title': 10.0})

        yield self.solr.add([{'id': 'doc_7', 'title': 'Spam doc doc'}],
                            boost={'title': 0})

        res = yield self.solr.search('doc')
        self.assertEqual(len(res), 5)
        self.assertEqual('doc_6', res.docs[0]['id'])

    @gen_test
    def test_delete(self):
        self.assertEqual(len((yield self.solr.search('doc'))), 3)
        yield self.solr.delete(id='doc_1')
        self.assertEqual(len((yield self.solr.search('doc'))), 2)
        yield self.solr.delete(q='price:[0 TO 15]')
        self.assertEqual(len((yield self.solr.search('doc'))), 1)

        self.assertEqual(len((yield self.solr.search('*:*'))), 1)
        yield self.solr.delete(q='*:*')
        self.assertEqual(len((yield self.solr.search('*:*'))), 0)

        # Need at least one.
        #self.assertRaises(ValueError, self.solr.delete)
        try:
            yield self.solr.delete()
        except ValueError:
            pass
        else:
            raise AssertionError, "should raise ValueError"
        # Can't have both.
        #self.assertRaises(ValueError, self.solr.delete, id='foo', q='bar')
        try:
            yield self.solr.delete(id='foo', q='bar')
        except ValueError:
            pass
        else:
            raise AssertionError, "should raise ValueError"

    @gen_test
    def test_commit(self):
        self.assertEqual(len((yield self.solr.search('doc'))), 3)
        yield self.solr.add([
            {
                'id': 'doc_6',
                'title': 'Newly added doc',
            }
        ], commit=False)
        self.assertEqual(len((yield self.solr.search('doc'))), 3)
        yield self.solr.commit()
        self.assertEqual(len((yield self.solr.search('doc'))), 4)


    @gen_test
    def test_optimize(self):
        # Make sure it doesn't blow up. Side effects are hard to measure. :/
        self.assertEqual(len((yield self.solr.search('doc'))), 3)
        yield self.solr.add([
            {
                'id': 'doc_6',
                'title': 'Newly added doc',
            }
        ], commit=False)
        self.assertEqual(len((yield self.solr.search('doc'))), 3)
        yield self.solr.optimize()
        self.assertEqual(len((yield self.solr.search('doc'))), 4)

    @gen_test
    def test_extract(self):
        fake_f = StringIO("""
            <html>
                <head>
                    <meta charset="utf-8">
                    <meta name="haystack-test" content="test 1234">
                    <title>Test Title ☃&#x2603;</title>
                </head>
                    <body>foobar</body>
            </html>
        """)
        fake_f.name = "test.html"
        extracted = yield self.solr.extract(fake_f)

        # Verify documented response structure:
        self.assertIn('contents', extracted)
        self.assertIn('metadata', extracted)

        self.assertIn('foobar', extracted['contents'])

        m = extracted['metadata']

        self.assertEqual([fake_f.name], m['stream_name'])

        self.assertIn('haystack-test', m, "HTML metadata should have been extracted!")
        self.assertEqual(['test 1234'], m['haystack-test'])

        # Note the underhanded use of a double snowman to verify both that Tika
        # correctly decoded entities and that our UTF-8 characters survived the
        # round-trip:
        self.assertEqual(['Test Title ☃☃'], m['title'])

    def test_full_url(self):
        self.solr.url = 'http://localhost:8983/solr/'
        full_url = self.solr._create_full_url(path='/update')

        # Make sure trailing and leading slashes do not collide:
        self.assertEqual(full_url, 'http://localhost:8983/solr/update')