示例#1
0
 def test_remove_failure_bulk(self):
     """ the remove operation should fail """
     doc_id = 'test_id'
     error = {'delete': {
         'status': 500, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id
     }}
     with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])):
         with self.assertRaises(BulkIndexError):
             self.searcher.remove(["test_id"])
    def flush(self):
        if len(self.buffer) > 0:
            results = helpers.parallel_bulk(client=self.client,
                                            actions=self.buffer,
                                            index=self.index_name,
                                            doc_type=self.experiment_name)
            errors = [status for success, status in results if not success]
            if errors:
                raise BulkIndexError(
                    "{} document(s) failed to index.".format(len(errors)),
                    errors)

            self.buffer.clear()
示例#3
0
async def _process_bulk_chunk(client,
                              bulk_actions,
                              bulk_data,
                              raise_on_exception=True,
                              raise_on_error=True,
                              *args,
                              **kwargs):
    """
    Send a bulk request to elasticsearch and process the output.
    """
    # if raise on error is set, we need to collect errors per chunk before raising them
    errors = []

    try:
        # send the actual request
        resp = await client.bulk('\n'.join(bulk_actions) + '\n', *args,
                                 **kwargs)
    except TransportError as e:
        # default behavior - just propagate exception
        if raise_on_exception:
            raise e

        # if we are not propagating, mark all actions in current chunk as failed
        err_message = str(e)
        exc_errors = []

        for data in bulk_data:
            # collect all the information about failed actions
            op_type, action = data[0].copy().popitem()
            info = {
                "error": err_message,
                "status": e.status_code,
                "exception": e
            }
            if op_type != 'delete':
                info['data'] = data[1]
            info.update(action)
            exc_errors.append({op_type: info})

        # emulate standard behavior for failed actions
        if raise_on_error:
            raise BulkIndexError(
                '%i document(s) failed to index.' % len(exc_errors),
                exc_errors)
        else:
            for err in exc_errors:
                yield False, err
            return

    # go through request-reponse pairs and detect failures
    for data, (op_type,
               item) in zip(bulk_data,
                            map(methodcaller('popitem'), resp['items'])):
        ok = 200 <= item.get('status', 500) < 300
        if not ok and raise_on_error:
            # include original document source
            if len(data) > 1:
                item['data'] = data[1]
            errors.append({op_type: item})

        if ok or not errors:
            # if we are not just recording all errors to be able to raise
            # them all at once, yield items individually
            yield ok, {op_type: item}

    if errors:
        raise BulkIndexError('%i document(s) failed to index.' % len(errors),
                             errors)
示例#4
0
    def add_events(
        self, company_id, events, worker, allow_locked_tasks=False
    ) -> Tuple[int, int, dict]:
        actions: List[dict] = []
        task_ids = set()
        task_iteration = defaultdict(lambda: 0)
        task_last_scalar_events = nested_dict(
            3, dict
        )  # task_id -> metric_hash -> variant_hash -> MetricEvent
        task_last_events = nested_dict(
            3, dict
        )  # task_id -> metric_hash -> event_type -> MetricEvent
        errors_per_type = defaultdict(int)
        invalid_iteration_error = f"Iteration number should not exceed {MAX_LONG}"
        valid_tasks = self._get_valid_tasks(
            company_id,
            task_ids={
                event["task"] for event in events if event.get("task") is not None
            },
            allow_locked_tasks=allow_locked_tasks,
        )

        for event in events:
            # remove spaces from event type
            event_type = event.get("type")
            if event_type is None:
                errors_per_type["Event must have a 'type' field"] += 1
                continue

            event_type = event_type.replace(" ", "_")
            if event_type not in EVENT_TYPES:
                errors_per_type[f"Invalid event type {event_type}"] += 1
                continue

            task_id = event.get("task")
            if task_id is None:
                errors_per_type["Event must have a 'task' field"] += 1
                continue

            if task_id not in valid_tasks:
                errors_per_type["Invalid task id"] += 1
                continue

            event["type"] = event_type

            # @timestamp indicates the time the event is written, not when it happened
            event["@timestamp"] = es_factory.get_es_timestamp_str()

            # for backward bomba-tavili-tea
            if "ts" in event:
                event["timestamp"] = event.pop("ts")

            # set timestamp and worker if not sent
            if "timestamp" not in event:
                event["timestamp"] = es_factory.get_timestamp_millis()

            if "worker" not in event:
                event["worker"] = worker

            # force iter to be a long int
            iter = event.get("iter")
            if iter is not None:
                iter = int(iter)
                if iter > MAX_LONG or iter < MIN_LONG:
                    errors_per_type[invalid_iteration_error] += 1
                    continue
                event["iter"] = iter

            # used to have "values" to indicate array. no need anymore
            if "values" in event:
                event["value"] = event["values"]
                del event["values"]

            event["metric"] = event.get("metric") or ""
            event["variant"] = event.get("variant") or ""

            index_name = get_index_name(company_id, event_type)
            es_action = {
                "_op_type": "index",  # overwrite if exists with same ID
                "_index": index_name,
                "_source": event,
            }

            # for "log" events, don't assing custom _id - whatever is sent, is written (not overwritten)
            if event_type != EventType.task_log.value:
                es_action["_id"] = self._get_event_id(event)
            else:
                es_action["_id"] = dbutils.id()

            task_ids.add(task_id)
            if (
                iter is not None
                and event.get("metric") not in self._skip_iteration_for_metric
            ):
                task_iteration[task_id] = max(iter, task_iteration[task_id])

            self._update_last_metric_events_for_task(
                last_events=task_last_events[task_id], event=event,
            )
            if event_type == EventType.metrics_scalar.value:
                self._update_last_scalar_events_for_task(
                    last_events=task_last_scalar_events[task_id], event=event
                )

            actions.append(es_action)

        plot_actions = [
            action["_source"]
            for action in actions
            if action["_source"]["type"] == EventType.metrics_plot.value
        ]
        if plot_actions:
            self.validate_and_compress_plots(
                plot_actions,
                validate_json=config.get("services.events.validate_plot_str", False),
                compression_threshold=config.get(
                    "services.events.plot_compression_threshold", 100_000
                ),
            )

        added = 0
        with translate_errors_context():
            if actions:
                chunk_size = 500
                with TimingContext("es", "events_add_batch"):
                    # TODO: replace it with helpers.parallel_bulk in the future once the parallel pool leak is fixed
                    with closing(
                        helpers.streaming_bulk(
                            self.es,
                            actions,
                            chunk_size=chunk_size,
                            # thread_count=8,
                            refresh=True,
                        )
                    ) as it:
                        for success, info in it:
                            if success:
                                added += 1
                            else:
                                errors_per_type["Error when indexing events batch"] += 1

                    remaining_tasks = set()
                    now = datetime.utcnow()
                    for task_id in task_ids:
                        # Update related tasks. For reasons of performance, we prefer to update
                        # all of them and not only those who's events were successful
                        updated = self._update_task(
                            company_id=company_id,
                            task_id=task_id,
                            now=now,
                            iter_max=task_iteration.get(task_id),
                            last_scalar_events=task_last_scalar_events.get(task_id),
                            last_events=task_last_events.get(task_id),
                        )

                        if not updated:
                            remaining_tasks.add(task_id)
                            continue

                    if remaining_tasks:
                        TaskBLL.set_last_update(
                            remaining_tasks, company_id, last_update=now
                        )

            # this is for backwards compatibility with streaming bulk throwing exception on those
            invalid_iterations_count = errors_per_type.get(invalid_iteration_error)
            if invalid_iterations_count:
                raise BulkIndexError(
                    f"{invalid_iterations_count} document(s) failed to index.",
                    [invalid_iteration_error],
                )

        if not added:
            raise errors.bad_request.EventsNotAdded(**errors_per_type)

        errors_count = sum(errors_per_type.values())
        return added, errors_count, errors_per_type