def safe_put_bulk(self, url, bulk_json): """ Bulk PUT controlling unicode issues """ headers = {"Content-Type": "application/x-ndjson"} try: res = self.requests.put(url + '?refresh=true', data=bulk_json, headers=headers) res.raise_for_status() except UnicodeEncodeError: # Related to body.encode('iso-8859-1'). mbox data logger.error("Encondig error ... converting bulk to iso-8859-1") bulk_json = bulk_json.encode('iso-8859-1', 'ignore') res = self.requests.put(url, data=bulk_json, headers=headers) res.raise_for_status() result = res.json() failed_items = [] if result['errors']: # Due to multiple errors that may be thrown when inserting bulk data, only the first error is returned failed_items = [item['index'] for item in result['items'] if 'error' in item['index']] error = str(failed_items[0]['error']) logger.error("Failed to insert data to ES: %s, %s", error, self.anonymize_url(url)) inserted_items = len(result['items']) - len(failed_items) # The exception is currently not thrown to avoid stopping ocean uploading processes try: if failed_items: raise ELKError(cause=error) except ELKError: pass logger.debug("%i items uploaded to ES (%s)", inserted_items, self.anonymize_url(url)) return inserted_items
def safe_put_bulk(self, url, bulk_json): """Bulk items to a target index `url`. In case of UnicodeEncodeError, the bulk is encoded with iso-8859-1. :param url: target index where to bulk the items :param bulk_json: str representation of the items to upload """ headers = {"Content-Type": "application/x-ndjson"} try: res = self.requests.put(url + '?refresh=true', data=bulk_json, headers=headers) res.raise_for_status() except UnicodeEncodeError: # Related to body.encode('iso-8859-1'). mbox data logger.warning("Encondig error ... converting bulk to iso-8859-1") bulk_json = bulk_json.encode('iso-8859-1', 'ignore') # 实际去发送 res = self.requests.put(url, data=bulk_json, headers=headers) res.raise_for_status() result = res.json() failed_items = [] error = "" if result['errors']: # Due to multiple errors that may be thrown when inserting bulk data, only the first error is returned failed_items = [ item['index'] for item in result['items'] if 'error' in item['index'] ] error = str(failed_items[0]['error']) logger.error("Failed to insert data to ES: {}, {}".format( error, self.anonymize_url(url))) inserted_items = len(result['items']) - len(failed_items) # The exception is currently not thrown to avoid stopping ocean uploading processes try: if failed_items: raise ELKError(cause=error) except ELKError: pass logger.debug("{} items uploaded to ES ({})".format( inserted_items, self.anonymize_url(url))) return inserted_items