示例#1
0
    def process_changes_chunk(self, changes_chunk):
        with self._datadog_timing('bulk_extract'):
            bad_changes, docs = bulk_fetch_changes_docs(changes_chunk)

        with self._datadog_timing('bulk_transform'):
            changes_to_process = {
                change.id: change
                for change in changes_chunk
                if change.document and not self.doc_filter_fn(change.document)
            }
            retry_changes = list(bad_changes)

            error_collector = ErrorCollector()
            es_actions = build_bulk_payload(self.index_info,
                                            list(changes_to_process.values()),
                                            self.doc_transform_fn,
                                            error_collector)
            error_changes = error_collector.errors

        try:
            with self._datadog_timing('bulk_load'):
                _, errors = self.es_interface.bulk_ops(
                    es_actions, raise_on_error=False, raise_on_exception=False)
        except Exception as e:
            pillow_logging.exception("[%s] ES bulk load error")
            error_changes.extend([(change, e)
                                  for change in changes_to_process.values()])
        else:
            for change_id, error_msg in get_errors_with_ids(errors):
                error_changes.append((changes_to_process[change_id],
                                      BulkDocException(error_msg)))
        return retry_changes, error_changes
示例#2
0
 def test_get_docs(self):
     missing_case_ids = [uuid.uuid4().hex, uuid.uuid4().hex]
     changes = self._changes_from_ids(self.case_ids + missing_case_ids)
     bad_changes, result_docs = bulk_fetch_changes_docs(changes, 'domain')
     self.assertEqual(set(self.case_ids),
                      set([doc['_id'] for doc in result_docs]))
     self.assertEqual(set(missing_case_ids),
                      set([change.id for change in bad_changes]))
示例#3
0
    def _process_chunk_for_domain(self, domain, changes_chunk):
        adapters = list(self.table_adapters_by_domain[domain])
        changes_by_id = {change.id: change for change in changes_chunk}
        to_delete_by_adapter = defaultdict(list)
        rows_to_save_by_adapter = defaultdict(list)
        async_configs_by_doc_id = defaultdict(list)
        to_update = {change for change in changes_chunk if not change.deleted}
        with self._metrics_timer('extract'):
            retry_changes, docs = bulk_fetch_changes_docs(to_update, domain)
        change_exceptions = []

        with self._metrics_timer('single_batch_transform'):
            for doc in docs:
                change = changes_by_id[doc['_id']]
                doc_subtype = change.metadata.document_subtype
                eval_context = EvaluationContext(doc)
                with self._metrics_timer('single_doc_transform'):
                    for adapter in adapters:
                        with self._per_config_metrics_timer('transform', adapter.config._id):
                            if adapter.config.filter(doc, eval_context):
                                if adapter.run_asynchronous:
                                    async_configs_by_doc_id[doc['_id']].append(adapter.config._id)
                                else:
                                    try:
                                        rows_to_save_by_adapter[adapter].extend(adapter.get_all_values(doc, eval_context))
                                    except Exception as e:
                                        change_exceptions.append((change, e))
                                    eval_context.reset_iteration()
                            elif (doc_subtype is None
                                    or doc_subtype in adapter.config.get_case_type_or_xmlns_filter()):
                                # Delete if the subtype is unknown or
                                # if the subtype matches our filters, but the full filter no longer applies
                                to_delete_by_adapter[adapter].append(doc)

        with self._metrics_timer('single_batch_delete'):
            # bulk delete by adapter
            to_delete = [{'_id': c.id} for c in changes_chunk if c.deleted]
            for adapter in adapters:
                delete_docs = to_delete_by_adapter[adapter] + to_delete
                if not delete_docs:
                    continue
                with self._per_config_metrics_timer('delete', adapter.config._id):
                    try:
                        adapter.bulk_delete(delete_docs)
                    except Exception:
                        delete_ids = [doc['_id'] for doc in delete_docs]
                        retry_changes.update([c for c in changes_chunk if c.id in delete_ids])

        with self._metrics_timer('single_batch_load'):
            # bulk update by adapter
            for adapter, rows in rows_to_save_by_adapter.items():
                with self._per_config_metrics_timer('load', adapter.config._id):
                    try:
                        adapter.save_rows(rows)
                    except Exception:
                        retry_changes.update(to_update)

        if async_configs_by_doc_id:
            with self._metrics_timer('async_config_load'):
                doc_type_by_id = {
                    _id: changes_by_id[_id].metadata.document_type
                    for _id in async_configs_by_doc_id.keys()
                }
                AsyncIndicator.bulk_update_records(async_configs_by_doc_id, domain, doc_type_by_id)

        return retry_changes, change_exceptions