def test_force_unicode(self): self.assertEqual(force_unicode(b'Hello \xe2\x98\x83'), 'Hello ☃') # Don't mangle, it's already Unicode. self.assertEqual(force_unicode('Hello ☃'), 'Hello ☃') self.assertEqual(force_unicode(1), '1', "force_unicode() should convert ints") self.assertEqual(force_unicode(1.0), '1.0', "force_unicode() should convert floats") self.assertEqual(force_unicode(None), 'None', 'force_unicode() should convert None')
def test_force_unicode(self): self.assertEqual(force_unicode(b"Hello \xe2\x98\x83"), "Hello ☃") # Don't mangle, it's already Unicode. self.assertEqual(force_unicode("Hello ☃"), "Hello ☃") self.assertEqual(force_unicode(1), "1", "force_unicode() should convert ints") self.assertEqual(force_unicode(1.0), "1.0", "force_unicode() should convert floats") self.assertEqual(force_unicode(None), "None", "force_unicode() should convert None")
def test_safe_urlencode(self): self.assertEqual( force_unicode(unquote_plus(safe_urlencode({"test": "Hello ☃! Helllo world!"}))), "test=Hello ☃! Helllo world!", ) self.assertEqual( force_unicode(unquote_plus(safe_urlencode({"test": ["Hello ☃!", "Helllo world!"]}, True))), "test=Hello \u2603!&test=Helllo world!", ) self.assertEqual( force_unicode(unquote_plus(safe_urlencode({"test": ("Hello ☃!", "Helllo world!")}, True))), "test=Hello \u2603!&test=Helllo world!", )
def _to_python(self, value): """ Converts values from Solr to native Python values. """ if isinstance(value, (int, list, tuple, float, long, complex)): return value if value == 'true': return True elif value == 'false': return False is_string = False if isinstance(value, str): value = force_unicode(value) if isinstance(value, basestring): is_string = True if is_string == True: possible_datetime = DATETIME_REGEX.search(value) if possible_datetime: date_values = possible_datetime.groupdict() for dk, dv in date_values.items(): date_values[dk] = int(dv) return datetime.datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], date_values['minute'], date_values['second']) return value
def put(self, data): ''' Supports partial update of solr. ''' if data is None or len(data) == 0: return # Update Solr: (Mostly from pysolr.Solr code.) # Generate the exact update command in xml - # <add> # <doc> # <field name="id">1</field> # <field name="memory_used" update="set">832</field> # </doc> # </add> data_xml = ET.Element('add') for doc_update in data: doc_element = ET.Element('doc') id_field = ET.Element('field', **{'name':'id'}) id_field.text = str(doc_update['id']) doc_element.append(id_field) for field in doc_update['fields']: field_xml = ET.Element('field', **{'name':field['name'], 'update':field['command']}) field_xml.text = str(field['value']) doc_element.append(field_xml) data_xml.append(doc_element) # This returns a bytestring. data_xml_str = ET.tostring(data_xml, encoding='utf-8') # Convert back to Unicode. data_xml_str = pysolr.force_unicode(data_xml_str) try: solr = session.get_solr_interface(self.solr_url) solr._update(data_xml_str) except: LOG.exception('Failed to add to solr.') raise
def test_safe_urlencode(self): self.assertEqual( force_unicode( unquote_plus(safe_urlencode({'test': 'Hello ☃! Helllo world!'}))), 'test=Hello ☃! Helllo world!') self.assertEqual( force_unicode( unquote_plus( safe_urlencode({'test': ['Hello ☃!', 'Helllo world!']}, True))), "test=Hello \u2603!&test=Helllo world!") self.assertEqual( force_unicode( unquote_plus( safe_urlencode({'test': ('Hello ☃!', 'Helllo world!')}, True))), "test=Hello \u2603!&test=Helllo world!")
def test__build_doc_with_sets(self): doc = {"id": "doc_1", "title": "Set test doc", "tags": {"alpha", "beta"}} doc_xml = force_unicode( ElementTree.tostring(self.solr._build_doc(doc), encoding="utf-8") ) self.assertIn('<field name="id">doc_1</field>', doc_xml) self.assertIn('<field name="title">Set test doc</field>', doc_xml) self.assertIn('<field name="tags">alpha</field>', doc_xml) self.assertIn('<field name="tags">beta</field>', doc_xml) self.assertEqual(len(doc_xml), 144)
def test__build_doc(self): doc = { 'id': 'doc_1', 'title': 'Example doc ☃ 1', 'price': 12.59, 'popularity': 10, } doc_xml = force_unicode(ElementTree.tostring(self.solr._build_doc(doc), encoding='utf-8')) self.assertTrue('<field name="title">Example doc ☃ 1</field>' in doc_xml) self.assertTrue('<field name="id">doc_1</field>' in doc_xml) self.assertEqual(len(doc_xml), 152)
def test__build_doc(self): doc = { "id": "doc_1", "title": "Example doc ☃ 1", "price": 12.59, "popularity": 10, } doc_xml = force_unicode( ElementTree.tostring(self.solr._build_doc(doc), encoding="utf-8")) self.assertIn('<field name="title">Example doc ☃ 1</field>', doc_xml) self.assertIn('<field name="id">doc_1</field>', doc_xml) self.assertEqual(len(doc_xml), 152)
def test_safe_urlencode(self): self.assertEqual( force_unicode( unquote_plus(safe_urlencode({"test": "Hello ☃! Helllo world!"}))), "test=Hello ☃! Helllo world!", ) self.assertEqual( force_unicode( unquote_plus( safe_urlencode({"test": ["Hello ☃!", "Helllo world!"]}, True))), "test=Hello \u2603!&test=Helllo world!", ) self.assertEqual( force_unicode( unquote_plus( safe_urlencode({"test": ("Hello ☃!", "Helllo world!")}, True))), "test=Hello \u2603!&test=Helllo world!", )
def test__build_doc_with_sets(self): doc = { 'id': 'doc_1', 'title': 'Set test doc', 'tags': set(['alpha', 'beta']), } doc_xml = force_unicode(ElementTree.tostring(self.solr._build_doc(doc), encoding='utf-8')) self.assertTrue('<field name="id">doc_1</field>' in doc_xml) self.assertTrue('<field name="title">Set test doc</field>' in doc_xml) self.assertTrue('<field name="tags">alpha</field>' in doc_xml) self.assertTrue('<field name="tags">beta</field>' in doc_xml) self.assertEqual(len(doc_xml), 144)
def add(solr, docs, dsId, commit=True, boost=None, commitWithin="1000", waitFlush=None, waitSearcher=None): """ Adds or updates documents. Requires ``docs``, which is a list of dictionaries. Each key is the field name and each value is the value to index. Optionally accepts ``commit``. Default is ``True``. Optionally accepts ``boost``. Default is ``None``. Optionally accepts ``commitWithin``. Default is ``None``. Optionally accepts ``waitFlush``. Default is ``None``. Optionally accepts ``waitSearcher``. Default is ``None``. Usage:: solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", }, ]) """ start_time = time.time() #self.log.debug("Starting to build add request...") message = ET.Element('add') if commitWithin: message.set('commitWithin', commitWithin) for doc in docs: message.append(solr._build_doc(doc, boost=boost)) # This returns a bytestring. Ugh. m = ET.tostring(message, encoding='utf-8') # Convert back to Unicode please. m = pysolr.force_unicode(m) #print "Indexing to: " + dsId end_time = time.time() #self.log.debug("Built add request of %s docs in %0.2f seconds.", len(message), end_time - start_time) return update(solr, m, dsId, commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher)
def test__build_xml_doc_with_empty_values(self): doc = { "id": "doc_1", "title": "", "price": None, "tags": [], } doc_xml = force_unicode( ElementTree.tostring(self.solr._build_xml_doc(doc), encoding="utf-8") ) self.assertNotIn('<field name="title" />', doc_xml) self.assertNotIn('<field name="price" />', doc_xml) self.assertNotIn('<field name="tags" />', doc_xml) self.assertIn('<field name="id">doc_1</field>', doc_xml) self.assertEqual(len(doc_xml), 41)
def test__build_xml_doc_with_empty_values_and_field_updates(self): doc = { "id": "doc_1", "title": "", "price": None, "tags": [], } fieldUpdates = { "title": "set", "tags": "set", } doc_xml = force_unicode( ElementTree.tostring( self.solr._build_xml_doc(doc, fieldUpdates=fieldUpdates), encoding="utf-8", ) ) self.assertIn('<field name="title" null="true" update="set" />', doc_xml) self.assertNotIn('<field name="price" />', doc_xml) self.assertIn('<field name="tags" null="true" update="set" />', doc_xml) self.assertIn('<field name="id">doc_1</field>', doc_xml) self.assertEqual(len(doc_xml), 134)
def _send_request(self, method, path='', body=None, headers=None, files=None): """ Copy and paste of the base (pysolr version 3.2.0) _send_request() method except for the resp = requests_method() line, which passes along the auth information. """ url = self._create_full_url(path) method = method.lower() log_body = body if headers is None: headers = {} if log_body is None: log_body = '' elif not isinstance(log_body, str): log_body = repr(body) self.log.debug("Starting request to '%s' (%s) with body '%s'...", url, method, log_body[:10]) start_time = time.time() try: requests_method = getattr(self.session, method, 'get') except AttributeError: err = "Unable to send HTTP method '{0}.".format(method) raise pysolr.SolrError(err) try: bytes_body = body if bytes_body is not None: bytes_body = pysolr.force_bytes(body) resp = requests_method(url, data=bytes_body, headers=headers, files=files, timeout=self.timeout, auth=self.auth) except requests.exceptions.Timeout as err: error_message = "Connection to server '%s' timed out: %s" self.log.error(error_message, url, err, exc_info=True) raise pysolr.SolrError(error_message % (url, err)) except requests.exceptions.ConnectionError as err: error_message = "Failed to connect to server at '%s', are you " \ "sure that URL is correct? Checking it in a " \ "browser might help: %s" params = (url, err) self.log.error(error_message, *params, exc_info=True) raise pysolr.SolrError(error_message % params) end_time = time.time() self.log.info("Finished '%s' (%s) with body '%s' in %0.3f seconds.", url, method, log_body[:10], end_time - start_time) if int(resp.status_code) != 200: error_message = self._extract_error(resp) data = { 'data': { 'headers': resp.headers, 'response': resp.content } } self.log.error(error_message, extra=data) raise pysolr.SolrError(error_message) return pysolr.force_unicode(resp.content)
def _send_request(self, method, path='', body=None, headers=None, files=None): """ Copy and paste of the base (pysolr version 3.2.0) _send_request() method except for the resp = requests_method() line, which passes along the auth information. """ url = self._create_full_url(path) method = method.lower() log_body = body if headers is None: headers = {} if log_body is None: log_body = '' elif not isinstance(log_body, str): log_body = repr(body) self.log.debug("Starting request to '%s' (%s) with body '%s'...", url, method, log_body[:10]) start_time = time.time() try: requests_method = getattr(self.session, method, 'get') except AttributeError: err = "Unable to send HTTP method '{0}.".format(method) raise pysolr.SolrError(err) try: bytes_body = body if bytes_body is not None: bytes_body = pysolr.force_bytes(body) resp = requests_method(url, data=bytes_body, headers=headers, files=files, timeout=self.timeout, auth=self.auth) except requests.exceptions.Timeout as err: error_message = "Connection to server '%s' timed out: %s" self.log.error(error_message, url, err, exc_info=True) raise pysolr.SolrError(error_message % (url, err)) except requests.exceptions.ConnectionError as err: error_message = "Failed to connect to server at '%s', are you " \ "sure that URL is correct? Checking it in a " \ "browser might help: %s" params = (url, err) self.log.error(error_message, *params, exc_info=True) raise pysolr.SolrError(error_message % params) end_time = time.time() self.log.info("Finished '%s' (%s) with body '%s' in %0.3f seconds.", url, method, log_body[:10], end_time - start_time) if int(resp.status_code) != 200: error_message = self._extract_error(resp) data = {'data': {'headers': resp.headers, 'response': resp.content}} self.log.error(error_message, extra=data) raise pysolr.SolrError(error_message) return pysolr.force_unicode(resp.content)
def _to_python(self, value): """ Converts values from Solr to native Python values. """ if value is None: return value if isinstance(value, (int, float, complex)): return value is_list = isinstance(value, (list, tuple)) values_processed = [] values_to_process = [] if isinstance(value, (list, tuple)): # Clone the value values_to_process = value[:] else: values_to_process.append(value) for value in values_to_process: if value == "true": values_processed.append(True) continue elif value == "false": values_processed.append(False) continue is_string = False if IS_PY3: if isinstance(value, bytes): value = force_unicode(value) if isinstance(value, str): is_string = True else: if isinstance(value, str): value = force_unicode(value) if isinstance(value, string_types): is_string = True if is_string: possible_datetime = DATETIME_REGEX.search(value) if possible_datetime: date_values = possible_datetime.groupdict() for dk, dv in date_values.items(): date_values[dk] = int(dv) values_processed.append( datetime.datetime( date_values["year"], date_values["month"], date_values["day"], date_values["hour"], date_values["minute"], date_values["second"], ) ) continue # elif ObjectId.is_valid(value): # values_processed.append(value) # continue elif is_valid_uuid(value, version=4): values_processed.append(value) continue elif is_valid_uuid(value, version=3): values_processed.append(value) continue elif is_valid_uuid(value, version=2): values_processed.append(value) continue elif is_valid_uuid(value, version=1): values_processed.append(value) continue try: # This is slightly gross but it's hard to tell otherwise what # the string's original type might have been. values_processed.append(ast.literal_eval(value)) except (ValueError, SyntaxError): # If it fails, continue on. pass values_processed.append(value) return values_processed if is_list else values_processed[0]
def test__build_doc(self): doc = {"id": "doc_1", "title": "Example doc ☃ 1", "price": 12.59, "popularity": 10} doc_xml = force_unicode(ET.tostring(self.solr._build_doc(doc), encoding="utf-8")) self.assertTrue('<field name="title">Example doc ☃ 1</field>' in doc_xml) self.assertTrue('<field name="id">doc_1</field>' in doc_xml) self.assertEqual(len(doc_xml), 152)
def test_safe_urlencode(self): self.assertEqual(force_unicode(unquote_plus(safe_urlencode({'test': 'Hello ☃! Helllo world!'}))), 'test=Hello ☃! Helllo world!') self.assertEqual(force_unicode(unquote_plus(safe_urlencode({'test': ['Hello ☃!', 'Helllo world!']}, True))), "test=Hello \u2603!&test=Helllo world!") self.assertEqual(force_unicode(unquote_plus(safe_urlencode({'test': ('Hello ☃!', 'Helllo world!')}, True))), "test=Hello \u2603!&test=Helllo world!")