def process_bulk_docs(self, docs): if len(docs) == 0: return True pillow_logging.info("Processing batch of %s docs", len((docs))) changes = [self._doc_to_change(doc) for doc in docs] error_collector = ErrorCollector() bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector) for change, exception in error_collector.errors: pillow_logging.error("Error procesing doc %s: %s", change.id, exception) max_payload_size = pow(10, 8) # ~ 100Mb payloads = prepare_bulk_payloads(bulk_changes, max_payload_size) if len(payloads) > 1: pillow_logging.info("Payload split into %s parts" % len(payloads)) for payload in payloads: success = self._send_payload_with_retries(payload) if not success: # stop the reindexer if we're unable to send a payload to ES return False return True
def process_changes_chunk(self, changes_chunk): with self._datadog_timing('bulk_extract'): bad_changes, docs = bulk_fetch_changes_docs(changes_chunk) with self._datadog_timing('bulk_transform'): changes_to_process = { change.id: change for change in changes_chunk if change.document and not self.doc_filter_fn(change.document) } retry_changes = list(bad_changes) error_collector = ErrorCollector() es_actions = build_bulk_payload(self.index_info, list(changes_to_process.values()), self.doc_transform_fn, error_collector) error_changes = error_collector.errors try: with self._datadog_timing('bulk_load'): _, errors = self.es_interface.bulk_ops( es_actions, raise_on_error=False, raise_on_exception=False) except Exception as e: pillow_logging.exception("[%s] ES bulk load error") error_changes.extend([(change, e) for change in changes_to_process.values()]) else: for change_id, error_msg in get_errors_with_ids(errors): error_changes.append((changes_to_process[change_id], BulkDocException(error_msg))) return retry_changes, error_changes
def process_bulk_docs(self, docs, progress_logger): if len(docs) == 0: return True pillow_logging.info("Processing batch of %s docs", len(docs)) changes = [ self._doc_to_change(doc) for doc in docs if self.process_deletes or not is_deletion(doc.get('doc_type')) ] error_collector = ErrorCollector() bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector) for change, exception in error_collector.errors: pillow_logging.error("Error procesing doc %s: %s (%s)", change.id, type(exception), exception) es_interface = ElasticsearchInterface(self.es) try: es_interface.bulk_ops(bulk_changes) except (ESBulkIndexError, ES2BulkIndexError, ES7BulkIndexError) as e: pillow_logging.error("Bulk index errors\n%s", e.errors) except Exception: pillow_logging.exception("\tException sending payload to ES") return False return True
def process_bulk_docs(self, docs, progress_logger): if not docs: return True pillow_logging.info("Processing batch of %s docs", len(docs)) changes = [] for doc in docs: change = self._doc_to_change(doc) # de-dupe the is_deletion check if self.process_deletes or not change.deleted: changes.append(change) error_collector = ErrorCollector() bulk_changes = build_bulk_payload(changes, self.doc_transform, error_collector) for change, exception in error_collector.errors: pillow_logging.error("Error processing doc %s: %s (%s)", change.id, type(exception), exception) es_interface = ElasticsearchInterface(self.es) try: es_interface.bulk_ops(self.index_info.alias, self.index_info.type, bulk_changes) except BulkIndexError as e: pillow_logging.error("Bulk index errors\n%s", e.errors) except Exception as exc: pillow_logging.exception( "Error sending bulk payload to Elasticsearch: %s", exc) return False return True
def process_bulk_docs(self, docs): if len(docs) == 0: return True pillow_logging.info("Processing batch of %s docs", len((docs))) changes = [self._doc_to_change(doc) for doc in docs] error_collector = ErrorCollector() bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector) for change, exception in error_collector.errors: pillow_logging.error("Error procesing doc %s: %s (%s)", change.id, type(exception), exception) es_interface = ElasticsearchInterface(self.es) try: es_interface.bulk_ops(bulk_changes) except Exception: pillow_logging.exception("\tException sending payload to ES") return False return True