def get_vector_metrics_per_iter(self, company_id, task_id, metric, variant): event_type = EventType.metrics_vector if check_empty_data(self.es, company_id=company_id, event_type=event_type): return [], [] es_req = { "size": 10000, "query": { "bool": { "must": [ {"term": {"task": task_id}}, {"term": {"metric": metric}}, {"term": {"variant": variant}}, ] } }, "_source": ["iter", "value"], "sort": ["iter"], } with translate_errors_context(), TimingContext("es", "task_stats_vector"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req ) vectors = [] iterations = [] for hit in es_res["hits"]["hits"]: vectors.append(hit["_source"]["value"]) iterations.append(hit["_source"]["iter"]) return iterations, vectors
def get_last_iters( self, company_id: str, event_type: EventType, task_id: str, iters: int ): if check_empty_data(self.es, company_id=company_id, event_type=event_type): return [] es_req: dict = { "size": 0, "aggs": { "iters": { "terms": { "field": "iter", "size": iters, "order": {"_key": "desc"}, } } }, "query": {"bool": {"must": [{"term": {"task": task_id}}]}}, } with translate_errors_context(), TimingContext("es", "task_last_iter"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req ) if "aggregations" not in es_res: return [] return [b["key"] for b in es_res["aggregations"]["iters"]["buckets"]]
def get_plot_image_urls( self, company_id: str, task_id: str, scroll_id: Optional[str] ) -> Tuple[Sequence[dict], Optional[str]]: if scroll_id == self.empty_scroll: return [], None if scroll_id: es_res = self.es.scroll(scroll_id=scroll_id, scroll="10m") else: if check_empty_data(self.es, company_id, EventType.metrics_plot): return [], None es_req = { "size": 1000, "_source": [PlotFields.source_urls], "query": { "bool": { "must": [ {"term": {"task": task_id}}, {"exists": {"field": PlotFields.source_urls}}, ] } }, } es_res = search_company_events( self.es, company_id=company_id, event_type=EventType.metrics_plot, body=es_req, scroll="10m", ) events, _, next_scroll_id = self._get_events_from_es_res(es_res) return events, next_scroll_id
def get_last_iterations_per_event_metric_variant( self, company_id: str, task_id: str, num_last_iterations: int, event_type: EventType, ): if check_empty_data(self.es, company_id=company_id, event_type=event_type): return [] es_req: dict = { "size": 0, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": {"_key": "asc"}, }, "aggs": { "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": {"_key": "asc"}, }, "aggs": { "iters": { "terms": { "field": "iter", "size": num_last_iterations, "order": {"_key": "desc"}, } } }, } }, } }, "query": {"bool": {"must": [{"term": {"task": task_id}}]}}, } with translate_errors_context(), TimingContext( "es", "task_last_iter_metric_variant" ): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req ) if "aggregations" not in es_res: return [] return [ (metric["key"], variant["key"], iter["key"]) for metric in es_res["aggregations"]["metrics"]["buckets"] for variant in metric["variants"]["buckets"] for iter in variant["iters"]["buckets"] ]
def get_metrics_and_variants(self, company_id: str, task_id: str, event_type: EventType): if check_empty_data(self.es, company_id=company_id, event_type=event_type): return {} es_req = { "size": 0, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": { "_key": "asc" }, }, "aggs": { "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": { "_key": "asc" }, } } }, } }, "query": { "bool": { "must": [{ "term": { "task": task_id } }] } }, } with translate_errors_context(), TimingContext( "es", "events_get_metrics_and_variants"): es_res = search_company_events(self.es, company_id=company_id, event_type=event_type, body=es_req) metrics = {} for metric_bucket in es_res["aggregations"]["metrics"].get("buckets"): metric = metric_bucket["key"] metrics[metric] = [ b["key"] for b in metric_bucket["variants"].get("buckets") ] return metrics
def scroll_task_events( self, company_id: str, task_id: str, order: str, event_type: EventType, batch_size=10000, scroll_id=None, ): if scroll_id == self.empty_scroll: return [], scroll_id, 0 if scroll_id: with translate_errors_context(), TimingContext( "es", "task_log_events"): es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h") else: size = min(batch_size, 10000) if check_empty_data(self.es, company_id=company_id, event_type=event_type): return [], None, 0 es_req = { "size": size, "sort": { "timestamp": { "order": order } }, "query": { "bool": { "must": [{ "term": { "task": task_id } }] } }, } with translate_errors_context(), TimingContext( "es", "scroll_task_events"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, scroll="1h", ) events, total_events, next_scroll_id = self._get_events_from_es_res( es_res) return events, next_scroll_id, total_events
def _query_aggregation_for_task_metrics( self, company_id: str, event_type: EventType, aggs: dict, task_id: str, metrics: Sequence[Tuple[str, str]], ) -> dict: """ Return the result of elastic search query for the given aggregation filtered by the given task_ids and metrics """ must = [{"term": {"task": task_id}}] if metrics: should = [{ "bool": { "must": [ { "term": { "metric": metric } }, { "term": { "variant": variant } }, ] } } for metric, variant in metrics] must.append({"bool": {"should": should}}) es_req = { "size": 0, "query": { "bool": { "must": must } }, "aggs": aggs, } with translate_errors_context(), TimingContext("es", "task_stats_scalar"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, ) return es_res.get("aggregations")
def get_last_iters( self, company_id: str, event_type: EventType, task_id: Union[str, Sequence[str]], iters: int, ) -> Mapping[str, Sequence]: if check_empty_data(self.es, company_id=company_id, event_type=event_type): return {} task_ids = [task_id] if isinstance(task_id, str) else task_id es_req: dict = { "size": 0, "aggs": { "tasks": { "terms": {"field": "task"}, "aggs": { "iters": { "terms": { "field": "iter", "size": iters, "order": {"_key": "desc"}, } } }, } }, "query": {"bool": {"must": [{"terms": {"task": task_ids}}]}}, } with translate_errors_context(), TimingContext("es", "task_last_iter"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, ) if "aggregations" not in es_res: return {} return { tb["key"]: [ib["key"] for ib in tb["iters"]["buckets"]] for tb in es_res["aggregations"]["tasks"]["buckets"] }
def _get_task_metrics(self, task_id: str, company_id: str, event_type: EventType) -> Sequence: es_req = { "size": 0, "query": { "bool": { "must": [{ "term": { "task": task_id } }] } }, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": { "_key": "asc" }, } } }, } with translate_errors_context(), TimingContext("es", "_get_task_metrics"): es_res = search_company_events(self.es, company_id=company_id, event_type=event_type, body=es_req) return [ metric["key"] for metric in safe_get( es_res, "aggregations/metrics/buckets", default=[]) ]
def _get_task_metric_events( self, metric: MetricScrollState, company_id: str, iter_count: int, navigate_earlier: bool, ) -> Tuple: """ Return task metric events grouped by iterations Update metric scroll state """ if metric.last_max_iter is None: # the first fetch is always from the latest iteration to the earlier ones navigate_earlier = True must_conditions = [ { "term": { "task": metric.task } }, { "term": { "metric": metric.name } }, { "exists": { "field": "url" } }, ] must_not_conditions = [] range_condition = None if navigate_earlier and metric.last_min_iter is not None: range_condition = {"lt": metric.last_min_iter} elif not navigate_earlier and metric.last_max_iter is not None: range_condition = {"gt": metric.last_max_iter} if range_condition: must_conditions.append({"range": {"iter": range_condition}}) if navigate_earlier: """ When navigating to earlier iterations consider only variants whose invalid iterations border is lower than our starting iteration. For these variants make sure that only events from the valid iterations are returned """ if not metric.last_min_iter: variants = metric.variants else: variants = list( v for v in metric.variants if v.last_invalid_iteration is None or v.last_invalid_iteration < metric.last_min_iter) if not variants: return metric.task, metric.name, [] must_conditions.append( {"terms": { "variant": list(v.name for v in variants) }}) else: """ When navigating to later iterations all variants may be relevant. For the variants whose invalid border is higher than our starting iteration make sure that only events from valid iterations are returned """ variants = list( v for v in metric.variants if v.last_invalid_iteration is not None and v.last_invalid_iteration > metric.last_max_iter) variants_conditions = [{ "bool": { "must": [ { "term": { "variant": v.name } }, { "range": { "iter": { "lte": v.last_invalid_iteration } } }, ] } } for v in variants if v.last_invalid_iteration is not None] if variants_conditions: must_not_conditions.append( {"bool": { "should": variants_conditions }}) es_req = { "size": 0, "query": { "bool": { "must": must_conditions, "must_not": must_not_conditions } }, "aggs": { "iters": { "terms": { "field": "iter", "size": iter_count, "order": { "_key": "desc" if navigate_earlier else "asc" }, }, "aggs": { "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": { "_key": "asc" }, }, "aggs": { "events": { "top_hits": { "sort": { "url": { "order": "desc" } } } } }, } }, } }, } with translate_errors_context(), TimingContext( "es", "get_debug_image_events"): es_res = search_company_events( self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req, ) if "aggregations" not in es_res: return metric.task, metric.name, [] def get_iteration_events(variant_buckets: Sequence[dict]) -> Sequence: return [ ev["_source"] for v in variant_buckets for ev in dpath.get(v, "events/hits/hits") ] iterations = [{ "iter": it["key"], "events": get_iteration_events(dpath.get(it, "variants/buckets")), } for it in dpath.get(es_res, "aggregations/iters/buckets")] if not navigate_earlier: iterations.sort(key=itemgetter("iter"), reverse=True) if iterations: metric.last_max_iter = iterations[0]["iter"] metric.last_min_iter = iterations[-1]["iter"] # Commented for now since the last invalid iteration is calculated in the beginning # if navigate_earlier and any( # variant.last_invalid_iteration is None for variant in variants # ): # """ # Variants validation flags due to recycling can # be set only on navigation to earlier frames # """ # iterations = self._update_variants_invalid_iterations(variants, iterations) return metric.task, metric.name, iterations
def _get_task_metric_intervals( self, company_id: str, event_type: EventType, task_id: str, samples: int, field: str = "iter", ) -> Sequence[MetricInterval]: """ Calculate interval per task metric variant so that the resulting amount of points does not exceed sample. Return the list og metric variant intervals as the following tuple: (metric, variant, interval, samples) """ es_req = { "size": 0, "query": { "term": { "task": task_id } }, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": { "_key": "asc" }, }, "aggs": { "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": { "_key": "asc" }, }, "aggs": { "count": { "value_count": { "field": field } }, "min_index": { "min": { "field": field } }, "max_index": { "max": { "field": field } }, }, } }, } }, } with translate_errors_context(), TimingContext( "es", "task_stats_get_interval"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, ) aggs_result = es_res.get("aggregations") if not aggs_result: return [] return [ self._build_metric_interval(metric["key"], variant["key"], variant, samples) for metric in aggs_result["metrics"]["buckets"] for variant in metric["variants"]["buckets"] ]
def _init_metric_states_for_task(self, task_metrics: Tuple[str, dict], company_id: str) -> Sequence[MetricState]: """ Return metric scroll states for the task filled with the variant states for the variants that reported any debug images """ task, metrics = task_metrics must = [{"term": {"task": task}}, {"exists": {"field": "url"}}] if metrics: must.append(get_metric_variants_condition(metrics)) query = {"bool": {"must": must}} es_req: dict = { "size": 0, "query": query, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": { "_key": "asc" }, }, "aggs": { "last_event_timestamp": { "max": { "field": "timestamp" } }, "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": { "_key": "asc" }, }, "aggs": { "urls": { "terms": { "field": "url", "order": { "max_iter": "desc" }, "size": 1, # we need only one url from the most recent iteration }, "aggs": { "max_iter": { "max": { "field": "iter" } }, "iters": { "top_hits": { "sort": { "iter": { "order": "desc" } }, "size": 2, # need two last iterations so that we can take # the second one as invalid "_source": "iter", } }, }, } }, }, }, } }, } with translate_errors_context(), TimingContext("es", "_init_metric_states"): es_res = search_company_events( self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req, ) if "aggregations" not in es_res: return [] def init_variant_state(variant: dict): """ Return new variant state for the passed variant bucket If the image urls get recycled then fill the last_invalid_iteration field """ state = VariantState(variant=variant["key"]) top_iter_url = dpath.get(variant, "urls/buckets")[0] iters = dpath.get(top_iter_url, "iters/hits/hits") if len(iters) > 1: state.last_invalid_iteration = dpath.get( iters[1], "_source/iter") return state return [ MetricState( metric=metric["key"], timestamp=dpath.get(metric, "last_event_timestamp/value"), variants=[ init_variant_state(variant) for variant in dpath.get(metric, "variants/buckets") ], ) for metric in dpath.get(es_res, "aggregations/metrics/buckets") ]
def _get_events( self, event_type: EventType, company_id: str, task_id: str, batch_size: int, navigate_earlier: bool, key: ScalarKey, from_key_value: Optional[Any], metric_variants: MetricVariants = None, ) -> Tuple[Sequence[dict], int]: """ Return up to 'batch size' events starting from the previous key-field value (timestamp or iter) either in the direction of earlier events (navigate_earlier=True) or in the direction of later events. If from_key_field is not set then start either from latest or earliest. For the last key-field value all the events are brought (even if the resulting size exceeds batch_size) so that events with this value will not be lost between the calls. """ query, must = self._get_initial_query_and_must(task_id, metric_variants) # retrieve the next batch of events es_req = { "size": batch_size, "query": query, "sort": { key.field: "desc" if navigate_earlier else "asc" }, } if from_key_value: es_req["search_after"] = [from_key_value] with translate_errors_context(), TimingContext("es", "get_task_events"): es_result = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, routing=task_id, ) hits = es_result["hits"]["hits"] hits_total = es_result["hits"]["total"]["value"] if not hits: return [], hits_total events = [hit["_source"] for hit in hits] # retrieve the events that match the last event timestamp # but did not make it into the previous call due to batch_size limitation es_req = { "size": 10000, "query": { "bool": { "must": must + [{ "term": { key.field: events[-1][key.field] } }] } }, } es_result = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, routing=task_id, ) last_second_hits = es_result["hits"]["hits"] if not last_second_hits or len(last_second_hits) < 2: # if only one element is returned for the last timestamp # then it is already present in the events return events, hits_total already_present_ids = set(hit["_id"] for hit in hits) last_second_events = [ hit["_source"] for hit in last_second_hits if hit["_id"] not in already_present_ids ] # return the list merged from original query results + # leftovers from the last timestamp return ( [*events, *last_second_events], hits_total, )
def _get_task_metric_events( self, task_state: TaskScrollState, company_id: str, iter_count: int, navigate_earlier: bool, ) -> Tuple: """ Return task metric events grouped by iterations Update task scroll state """ if not task_state.metrics: return task_state.task, [] if task_state.last_max_iter is None: # the first fetch is always from the latest iteration to the earlier ones navigate_earlier = True must_conditions = [ { "term": { "task": task_state.task } }, { "terms": { "metric": [m.metric for m in task_state.metrics] } }, { "exists": { "field": "url" } }, ] range_condition = None if navigate_earlier and task_state.last_min_iter is not None: range_condition = {"lt": task_state.last_min_iter} elif not navigate_earlier and task_state.last_max_iter is not None: range_condition = {"gt": task_state.last_max_iter} if range_condition: must_conditions.append({"range": {"iter": range_condition}}) es_req = { "size": 0, "query": { "bool": { "must": must_conditions } }, "aggs": { "iters": { "terms": { "field": "iter", "size": iter_count, "order": { "_key": "desc" if navigate_earlier else "asc" }, }, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": { "_key": "asc" }, }, "aggs": { "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": { "_key": "asc" }, }, "aggs": { "events": { "top_hits": { "sort": { "url": { "order": "desc" } } } } }, } }, } }, } }, } with translate_errors_context(), TimingContext( "es", "get_debug_image_events"): es_res = search_company_events( self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req, ) if "aggregations" not in es_res: return task_state.task, [] invalid_iterations = {(m.metric, v.variant): v.last_invalid_iteration for m in task_state.metrics for v in m.variants} def is_valid_event(event: dict) -> bool: key = event.get("metric"), event.get("variant") if key not in invalid_iterations: return False max_invalid = invalid_iterations[key] return max_invalid is None or event.get("iter") > max_invalid def get_iteration_events(it_: dict) -> Sequence: return [ ev["_source"] for m in dpath.get(it_, "metrics/buckets") for v in dpath.get(m, "variants/buckets") for ev in dpath.get(v, "events/hits/hits") if is_valid_event(ev["_source"]) ] iterations = [] for it in dpath.get(es_res, "aggregations/iters/buckets"): events = get_iteration_events(it) if events: iterations.append({"iter": it["key"], "events": events}) if not navigate_earlier: iterations.sort(key=itemgetter("iter"), reverse=True) if iterations: task_state.last_max_iter = iterations[0]["iter"] task_state.last_min_iter = iterations[-1]["iter"] return task_state.task, iterations
def get_task_latest_scalar_values( self, company_id, task_id ) -> Tuple[Sequence[dict], int]: event_type = EventType.metrics_scalar if check_empty_data(self.es, company_id=company_id, event_type=event_type): return [], 0 query = { "bool": { "must": [ {"query_string": {"query": "value:>0"}}, {"term": {"task": task_id}}, ] } } es_req = { "size": 0, "query": query, "aggs": { "metrics": { "terms": { "field": "metric", "size": EventSettings.max_metrics_count, "order": {"_key": "asc"}, }, "aggs": { "variants": { "terms": { "field": "variant", "size": EventSettings.max_variants_count, "order": {"_key": "asc"}, }, "aggs": { "last_value": { "top_hits": { "docvalue_fields": ["value"], "_source": "value", "size": 1, "sort": [{"iter": {"order": "desc"}}], } }, "last_timestamp": {"max": {"field": "@timestamp"}}, "last_10_value": { "top_hits": { "docvalue_fields": ["value"], "_source": "value", "size": 10, "sort": [{"iter": {"order": "desc"}}], } }, }, } }, } }, "_source": {"excludes": []}, } with translate_errors_context(), TimingContext( "es", "events_get_metrics_and_variants" ): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req ) metrics = [] max_timestamp = 0 for metric_bucket in es_res["aggregations"]["metrics"].get("buckets"): metric_summary = dict(name=metric_bucket["key"], variants=[]) for variant_bucket in metric_bucket["variants"].get("buckets"): variant_name = variant_bucket["key"] last_value = variant_bucket["last_value"]["hits"]["hits"][0]["fields"][ "value" ][0] last_10_value = variant_bucket["last_10_value"]["hits"]["hits"][0][ "fields" ]["value"][0] timestamp = variant_bucket["last_timestamp"]["value"] max_timestamp = max(timestamp, max_timestamp) metric_summary["variants"].append( dict( name=variant_name, last_value=last_value, last_10_value=last_10_value, ) ) metrics.append(metric_summary) return metrics, max_timestamp
def get_task_events( self, company_id: str, task_id: str, event_type: EventType, metric=None, variant=None, last_iter_count=None, sort=None, size=500, scroll_id=None, no_scroll=False, ) -> TaskEventsResult: if scroll_id == self.empty_scroll: return TaskEventsResult() if scroll_id: with translate_errors_context(), TimingContext("es", "get_task_events"): es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h") else: if check_empty_data(self.es, company_id=company_id, event_type=event_type): return TaskEventsResult() task_ids = [task_id] if isinstance(task_id, str) else task_id must = [] if metric: must.append({"term": {"metric": metric}}) if variant: must.append({"term": {"variant": variant}}) if last_iter_count is None: must.append({"terms": {"task": task_ids}}) else: tasks_iters = self.get_last_iters( company_id=company_id, event_type=event_type, task_id=task_ids, iters=last_iter_count, ) should = [ { "bool": { "must": [ {"term": {"task": task}}, {"terms": {"iter": last_iters}}, ] } } for task, last_iters in tasks_iters.items() if last_iters ] if not should: return TaskEventsResult() must.append({"bool": {"should": should}}) if sort is None: sort = [{"timestamp": {"order": "asc"}}] es_req = { "sort": sort, "size": min(size, 10000), "query": {"bool": {"must": must}}, } with translate_errors_context(), TimingContext("es", "get_task_events"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, ignore=404, **({} if no_scroll else {"scroll": "1h"}), ) events, total_events, next_scroll_id = self._get_events_from_es_res(es_res) if event_type in (EventType.metrics_plot, EventType.all): self.uncompress_plots(events) return TaskEventsResult( events=events, next_scroll_id=next_scroll_id, total_events=total_events )
def get_task_events( self, company_id: str, task_id: str, event_type: EventType, metric=None, variant=None, last_iter_count=None, sort=None, size=500, scroll_id=None, ): if scroll_id == self.empty_scroll: return [], scroll_id, 0 if scroll_id: with translate_errors_context(), TimingContext( "es", "get_task_events"): es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h") else: task_ids = [task_id] if isinstance(task_id, six.string_types) else task_id if check_empty_data(self.es, company_id=company_id, event_type=event_type): return TaskEventsResult() must = [] if metric: must.append({"term": {"metric": metric}}) if variant: must.append({"term": {"variant": variant}}) if last_iter_count is None: must.append({"terms": {"task": task_ids}}) else: should = [] for i, task_id in enumerate(task_ids): last_iters = self.get_last_iters( company_id=company_id, event_type=event_type, task_id=task_id, iters=last_iter_count, ) if not last_iters: continue should.append({ "bool": { "must": [ { "term": { "task": task_id } }, { "terms": { "iter": last_iters } }, ] } }) if not should: return TaskEventsResult() must.append({"bool": {"should": should}}) if sort is None: sort = [{"timestamp": {"order": "asc"}}] es_req = { "sort": sort, "size": min(size, 10000), "query": { "bool": { "must": must } }, } with translate_errors_context(), TimingContext( "es", "get_task_events"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, ignore=404, scroll="1h", ) events, total_events, next_scroll_id = self._get_events_from_es_res( es_res) return TaskEventsResult(events=events, next_scroll_id=next_scroll_id, total_events=total_events)
def get_task_plots( self, company_id: str, tasks: Sequence[str], last_iterations_per_plot: int = None, sort=None, size: int = 500, scroll_id: str = None, no_scroll: bool = False, metric_variants: MetricVariants = None, ): if scroll_id == self.empty_scroll: return TaskEventsResult() if scroll_id: with translate_errors_context(), TimingContext("es", "get_task_events"): es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h") else: event_type = EventType.metrics_plot if check_empty_data(self.es, company_id=company_id, event_type=event_type): return TaskEventsResult() plot_valid_condition = { "bool": { "should": [ {"term": {PlotFields.valid_plot: True}}, { "bool": { "must_not": {"exists": {"field": PlotFields.valid_plot}} } }, ] } } must = [plot_valid_condition] if last_iterations_per_plot is None: must.append({"terms": {"task": tasks}}) if metric_variants: must.append(get_metric_variants_condition(metric_variants)) else: should = [] for i, task_id in enumerate(tasks): last_iters = self.get_last_iterations_per_event_metric_variant( company_id=company_id, task_id=task_id, num_last_iterations=last_iterations_per_plot, event_type=event_type, metric_variants=metric_variants, ) if not last_iters: continue for metric, variant, iter in last_iters: should.append( { "bool": { "must": [ {"term": {"task": task_id}}, {"term": {"metric": metric}}, {"term": {"variant": variant}}, {"term": {"iter": iter}}, ] } } ) if not should: return TaskEventsResult() must.append({"bool": {"should": should}}) if sort is None: sort = [{"timestamp": {"order": "asc"}}] es_req = { "sort": sort, "size": min(size, 10000), "query": {"bool": {"must": must}}, } with translate_errors_context(), TimingContext("es", "get_task_plots"): es_res = search_company_events( self.es, company_id=company_id, event_type=event_type, body=es_req, ignore=404, **({} if no_scroll else {"scroll": "1h"}), ) events, total_events, next_scroll_id = self._get_events_from_es_res(es_res) self.uncompress_plots(events) return TaskEventsResult( events=events, next_scroll_id=next_scroll_id, total_events=total_events )
def _get_events( self, company_id: str, task_id: str, batch_size: int, navigate_earlier: bool, from_timestamp: Optional[int], ) -> Tuple[Sequence[dict], int]: """ Return up to 'batch size' events starting from the previous timestamp either in the direction of earlier events (navigate_earlier=True) or in the direction of later events. If last_min_timestamp and last_max_timestamp are not set then start either from latest or earliest. For the last timestamp all the events are brought (even if the resulting size exceeds batch_size) so that this timestamp events will not be lost between the calls. In case any events were received update 'last_min_timestamp' and 'last_max_timestamp' """ # retrieve the next batch of events es_req = { "size": batch_size, "query": { "term": { "task": task_id } }, "sort": { "timestamp": "desc" if navigate_earlier else "asc" }, } if from_timestamp: es_req["search_after"] = [from_timestamp] with translate_errors_context(), TimingContext("es", "get_task_events"): es_result = search_company_events( self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req, ) hits = es_result["hits"]["hits"] hits_total = es_result["hits"]["total"]["value"] if not hits: return [], hits_total events = [hit["_source"] for hit in hits] # retrieve the events that match the last event timestamp # but did not make it into the previous call due to batch_size limitation es_req = { "size": 10000, "query": { "bool": { "must": [ { "term": { "task": task_id } }, { "term": { "timestamp": events[-1]["timestamp"] } }, ] } }, } es_result = search_company_events( self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req, ) last_second_hits = es_result["hits"]["hits"] if not last_second_hits or len(last_second_hits) < 2: # if only one element is returned for the last timestamp # then it is already present in the events return events, hits_total already_present_ids = set(hit["_id"] for hit in hits) last_second_events = [ hit["_source"] for hit in last_second_hits if hit["_id"] not in already_present_ids ] # return the list merged from original query results + # leftovers from the last timestamp return ( [*events, *last_second_events], hits_total, )