Exemplo n.º 1
0
    def get_vector_metrics_per_iter(self, company_id, task_id, metric, variant):
        event_type = EventType.metrics_vector
        if check_empty_data(self.es, company_id=company_id, event_type=event_type):
            return [], []

        es_req = {
            "size": 10000,
            "query": {
                "bool": {
                    "must": [
                        {"term": {"task": task_id}},
                        {"term": {"metric": metric}},
                        {"term": {"variant": variant}},
                    ]
                }
            },
            "_source": ["iter", "value"],
            "sort": ["iter"],
        }
        with translate_errors_context(), TimingContext("es", "task_stats_vector"):
            es_res = search_company_events(
                self.es, company_id=company_id, event_type=event_type, body=es_req
            )

        vectors = []
        iterations = []
        for hit in es_res["hits"]["hits"]:
            vectors.append(hit["_source"]["value"])
            iterations.append(hit["_source"]["iter"])

        return iterations, vectors
Exemplo n.º 2
0
    def get_last_iters(
        self, company_id: str, event_type: EventType, task_id: str, iters: int
    ):
        if check_empty_data(self.es, company_id=company_id, event_type=event_type):
            return []

        es_req: dict = {
            "size": 0,
            "aggs": {
                "iters": {
                    "terms": {
                        "field": "iter",
                        "size": iters,
                        "order": {"_key": "desc"},
                    }
                }
            },
            "query": {"bool": {"must": [{"term": {"task": task_id}}]}},
        }

        with translate_errors_context(), TimingContext("es", "task_last_iter"):
            es_res = search_company_events(
                self.es, company_id=company_id, event_type=event_type, body=es_req
            )

        if "aggregations" not in es_res:
            return []

        return [b["key"] for b in es_res["aggregations"]["iters"]["buckets"]]
Exemplo n.º 3
0
    def get_plot_image_urls(
        self, company_id: str, task_id: str, scroll_id: Optional[str]
    ) -> Tuple[Sequence[dict], Optional[str]]:
        if scroll_id == self.empty_scroll:
            return [], None

        if scroll_id:
            es_res = self.es.scroll(scroll_id=scroll_id, scroll="10m")
        else:
            if check_empty_data(self.es, company_id, EventType.metrics_plot):
                return [], None

            es_req = {
                "size": 1000,
                "_source": [PlotFields.source_urls],
                "query": {
                    "bool": {
                        "must": [
                            {"term": {"task": task_id}},
                            {"exists": {"field": PlotFields.source_urls}},
                        ]
                    }
                },
            }
            es_res = search_company_events(
                self.es,
                company_id=company_id,
                event_type=EventType.metrics_plot,
                body=es_req,
                scroll="10m",
            )

        events, _, next_scroll_id = self._get_events_from_es_res(es_res)
        return events, next_scroll_id
Exemplo n.º 4
0
    def get_last_iterations_per_event_metric_variant(
        self,
        company_id: str,
        task_id: str,
        num_last_iterations: int,
        event_type: EventType,
    ):
        if check_empty_data(self.es, company_id=company_id, event_type=event_type):
            return []

        es_req: dict = {
            "size": 0,
            "aggs": {
                "metrics": {
                    "terms": {
                        "field": "metric",
                        "size": EventSettings.max_metrics_count,
                        "order": {"_key": "asc"},
                    },
                    "aggs": {
                        "variants": {
                            "terms": {
                                "field": "variant",
                                "size": EventSettings.max_variants_count,
                                "order": {"_key": "asc"},
                            },
                            "aggs": {
                                "iters": {
                                    "terms": {
                                        "field": "iter",
                                        "size": num_last_iterations,
                                        "order": {"_key": "desc"},
                                    }
                                }
                            },
                        }
                    },
                }
            },
            "query": {"bool": {"must": [{"term": {"task": task_id}}]}},
        }

        with translate_errors_context(), TimingContext(
            "es", "task_last_iter_metric_variant"
        ):
            es_res = search_company_events(
                self.es, company_id=company_id, event_type=event_type, body=es_req
            )

        if "aggregations" not in es_res:
            return []

        return [
            (metric["key"], variant["key"], iter["key"])
            for metric in es_res["aggregations"]["metrics"]["buckets"]
            for variant in metric["variants"]["buckets"]
            for iter in variant["iters"]["buckets"]
        ]
Exemplo n.º 5
0
    def get_metrics_and_variants(self, company_id: str, task_id: str,
                                 event_type: EventType):
        if check_empty_data(self.es,
                            company_id=company_id,
                            event_type=event_type):
            return {}

        es_req = {
            "size": 0,
            "aggs": {
                "metrics": {
                    "terms": {
                        "field": "metric",
                        "size": EventSettings.max_metrics_count,
                        "order": {
                            "_key": "asc"
                        },
                    },
                    "aggs": {
                        "variants": {
                            "terms": {
                                "field": "variant",
                                "size": EventSettings.max_variants_count,
                                "order": {
                                    "_key": "asc"
                                },
                            }
                        }
                    },
                }
            },
            "query": {
                "bool": {
                    "must": [{
                        "term": {
                            "task": task_id
                        }
                    }]
                }
            },
        }

        with translate_errors_context(), TimingContext(
                "es", "events_get_metrics_and_variants"):
            es_res = search_company_events(self.es,
                                           company_id=company_id,
                                           event_type=event_type,
                                           body=es_req)

        metrics = {}
        for metric_bucket in es_res["aggregations"]["metrics"].get("buckets"):
            metric = metric_bucket["key"]
            metrics[metric] = [
                b["key"] for b in metric_bucket["variants"].get("buckets")
            ]

        return metrics
Exemplo n.º 6
0
    def scroll_task_events(
        self,
        company_id: str,
        task_id: str,
        order: str,
        event_type: EventType,
        batch_size=10000,
        scroll_id=None,
    ):
        if scroll_id == self.empty_scroll:
            return [], scroll_id, 0

        if scroll_id:
            with translate_errors_context(), TimingContext(
                    "es", "task_log_events"):
                es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h")
        else:
            size = min(batch_size, 10000)
            if check_empty_data(self.es,
                                company_id=company_id,
                                event_type=event_type):
                return [], None, 0

            es_req = {
                "size": size,
                "sort": {
                    "timestamp": {
                        "order": order
                    }
                },
                "query": {
                    "bool": {
                        "must": [{
                            "term": {
                                "task": task_id
                            }
                        }]
                    }
                },
            }

            with translate_errors_context(), TimingContext(
                    "es", "scroll_task_events"):
                es_res = search_company_events(
                    self.es,
                    company_id=company_id,
                    event_type=event_type,
                    body=es_req,
                    scroll="1h",
                )

        events, total_events, next_scroll_id = self._get_events_from_es_res(
            es_res)
        return events, next_scroll_id, total_events
Exemplo n.º 7
0
    def _query_aggregation_for_task_metrics(
        self,
        company_id: str,
        event_type: EventType,
        aggs: dict,
        task_id: str,
        metrics: Sequence[Tuple[str, str]],
    ) -> dict:
        """
        Return the result of elastic search query for the given aggregation filtered
        by the given task_ids and metrics
        """
        must = [{"term": {"task": task_id}}]
        if metrics:
            should = [{
                "bool": {
                    "must": [
                        {
                            "term": {
                                "metric": metric
                            }
                        },
                        {
                            "term": {
                                "variant": variant
                            }
                        },
                    ]
                }
            } for metric, variant in metrics]
            must.append({"bool": {"should": should}})

        es_req = {
            "size": 0,
            "query": {
                "bool": {
                    "must": must
                }
            },
            "aggs": aggs,
        }

        with translate_errors_context(), TimingContext("es",
                                                       "task_stats_scalar"):
            es_res = search_company_events(
                self.es,
                company_id=company_id,
                event_type=event_type,
                body=es_req,
            )

        return es_res.get("aggregations")
Exemplo n.º 8
0
    def get_last_iters(
        self,
        company_id: str,
        event_type: EventType,
        task_id: Union[str, Sequence[str]],
        iters: int,
    ) -> Mapping[str, Sequence]:
        if check_empty_data(self.es, company_id=company_id, event_type=event_type):
            return {}

        task_ids = [task_id] if isinstance(task_id, str) else task_id
        es_req: dict = {
            "size": 0,
            "aggs": {
                "tasks": {
                    "terms": {"field": "task"},
                    "aggs": {
                        "iters": {
                            "terms": {
                                "field": "iter",
                                "size": iters,
                                "order": {"_key": "desc"},
                            }
                        }
                    },
                }
            },
            "query": {"bool": {"must": [{"terms": {"task": task_ids}}]}},
        }

        with translate_errors_context(), TimingContext("es", "task_last_iter"):
            es_res = search_company_events(
                self.es, company_id=company_id, event_type=event_type, body=es_req,
            )

        if "aggregations" not in es_res:
            return {}

        return {
            tb["key"]: [ib["key"] for ib in tb["iters"]["buckets"]]
            for tb in es_res["aggregations"]["tasks"]["buckets"]
        }
Exemplo n.º 9
0
    def _get_task_metrics(self, task_id: str, company_id: str,
                          event_type: EventType) -> Sequence:
        es_req = {
            "size": 0,
            "query": {
                "bool": {
                    "must": [{
                        "term": {
                            "task": task_id
                        }
                    }]
                }
            },
            "aggs": {
                "metrics": {
                    "terms": {
                        "field": "metric",
                        "size": EventSettings.max_metrics_count,
                        "order": {
                            "_key": "asc"
                        },
                    }
                }
            },
        }

        with translate_errors_context(), TimingContext("es",
                                                       "_get_task_metrics"):
            es_res = search_company_events(self.es,
                                           company_id=company_id,
                                           event_type=event_type,
                                           body=es_req)

        return [
            metric["key"] for metric in safe_get(
                es_res, "aggregations/metrics/buckets", default=[])
        ]
    def _get_task_metric_events(
        self,
        metric: MetricScrollState,
        company_id: str,
        iter_count: int,
        navigate_earlier: bool,
    ) -> Tuple:
        """
        Return task metric events grouped by iterations
        Update metric scroll state
        """
        if metric.last_max_iter is None:
            # the first fetch is always from the latest iteration to the earlier ones
            navigate_earlier = True

        must_conditions = [
            {
                "term": {
                    "task": metric.task
                }
            },
            {
                "term": {
                    "metric": metric.name
                }
            },
            {
                "exists": {
                    "field": "url"
                }
            },
        ]
        must_not_conditions = []

        range_condition = None
        if navigate_earlier and metric.last_min_iter is not None:
            range_condition = {"lt": metric.last_min_iter}
        elif not navigate_earlier and metric.last_max_iter is not None:
            range_condition = {"gt": metric.last_max_iter}
        if range_condition:
            must_conditions.append({"range": {"iter": range_condition}})

        if navigate_earlier:
            """
            When navigating to earlier iterations consider only
            variants whose invalid iterations border is lower than
            our starting iteration. For these variants make sure
            that only events from the valid iterations are returned 
            """
            if not metric.last_min_iter:
                variants = metric.variants
            else:
                variants = list(
                    v for v in metric.variants
                    if v.last_invalid_iteration is None
                    or v.last_invalid_iteration < metric.last_min_iter)
                if not variants:
                    return metric.task, metric.name, []
                must_conditions.append(
                    {"terms": {
                        "variant": list(v.name for v in variants)
                    }})
        else:
            """
            When navigating to later iterations all variants may be relevant.
            For the variants whose invalid border is higher than our starting 
            iteration make sure that only events from valid iterations are returned 
            """
            variants = list(
                v for v in metric.variants
                if v.last_invalid_iteration is not None
                and v.last_invalid_iteration > metric.last_max_iter)

        variants_conditions = [{
            "bool": {
                "must": [
                    {
                        "term": {
                            "variant": v.name
                        }
                    },
                    {
                        "range": {
                            "iter": {
                                "lte": v.last_invalid_iteration
                            }
                        }
                    },
                ]
            }
        } for v in variants if v.last_invalid_iteration is not None]
        if variants_conditions:
            must_not_conditions.append(
                {"bool": {
                    "should": variants_conditions
                }})

        es_req = {
            "size": 0,
            "query": {
                "bool": {
                    "must": must_conditions,
                    "must_not": must_not_conditions
                }
            },
            "aggs": {
                "iters": {
                    "terms": {
                        "field": "iter",
                        "size": iter_count,
                        "order": {
                            "_key": "desc" if navigate_earlier else "asc"
                        },
                    },
                    "aggs": {
                        "variants": {
                            "terms": {
                                "field": "variant",
                                "size": EventSettings.max_variants_count,
                                "order": {
                                    "_key": "asc"
                                },
                            },
                            "aggs": {
                                "events": {
                                    "top_hits": {
                                        "sort": {
                                            "url": {
                                                "order": "desc"
                                            }
                                        }
                                    }
                                }
                            },
                        }
                    },
                }
            },
        }
        with translate_errors_context(), TimingContext(
                "es", "get_debug_image_events"):
            es_res = search_company_events(
                self.es,
                company_id=company_id,
                event_type=self.EVENT_TYPE,
                body=es_req,
            )
        if "aggregations" not in es_res:
            return metric.task, metric.name, []

        def get_iteration_events(variant_buckets: Sequence[dict]) -> Sequence:
            return [
                ev["_source"] for v in variant_buckets
                for ev in dpath.get(v, "events/hits/hits")
            ]

        iterations = [{
            "iter":
            it["key"],
            "events":
            get_iteration_events(dpath.get(it, "variants/buckets")),
        } for it in dpath.get(es_res, "aggregations/iters/buckets")]
        if not navigate_earlier:
            iterations.sort(key=itemgetter("iter"), reverse=True)
        if iterations:
            metric.last_max_iter = iterations[0]["iter"]
            metric.last_min_iter = iterations[-1]["iter"]

        # Commented for now since the last invalid iteration is calculated in the beginning
        # if navigate_earlier and any(
        #     variant.last_invalid_iteration is None for variant in variants
        # ):
        #     """
        #     Variants validation flags due to recycling can
        #     be set only on navigation to earlier frames
        #     """
        #     iterations = self._update_variants_invalid_iterations(variants, iterations)

        return metric.task, metric.name, iterations
Exemplo n.º 11
0
    def _get_task_metric_intervals(
        self,
        company_id: str,
        event_type: EventType,
        task_id: str,
        samples: int,
        field: str = "iter",
    ) -> Sequence[MetricInterval]:
        """
        Calculate interval per task metric variant so that the resulting
        amount of points does not exceed sample.
        Return the list og metric variant intervals as the following tuple:
        (metric, variant, interval, samples)
        """
        es_req = {
            "size": 0,
            "query": {
                "term": {
                    "task": task_id
                }
            },
            "aggs": {
                "metrics": {
                    "terms": {
                        "field": "metric",
                        "size": EventSettings.max_metrics_count,
                        "order": {
                            "_key": "asc"
                        },
                    },
                    "aggs": {
                        "variants": {
                            "terms": {
                                "field": "variant",
                                "size": EventSettings.max_variants_count,
                                "order": {
                                    "_key": "asc"
                                },
                            },
                            "aggs": {
                                "count": {
                                    "value_count": {
                                        "field": field
                                    }
                                },
                                "min_index": {
                                    "min": {
                                        "field": field
                                    }
                                },
                                "max_index": {
                                    "max": {
                                        "field": field
                                    }
                                },
                            },
                        }
                    },
                }
            },
        }

        with translate_errors_context(), TimingContext(
                "es", "task_stats_get_interval"):
            es_res = search_company_events(
                self.es,
                company_id=company_id,
                event_type=event_type,
                body=es_req,
            )

        aggs_result = es_res.get("aggregations")
        if not aggs_result:
            return []

        return [
            self._build_metric_interval(metric["key"], variant["key"], variant,
                                        samples)
            for metric in aggs_result["metrics"]["buckets"]
            for variant in metric["variants"]["buckets"]
        ]
Exemplo n.º 12
0
    def _init_metric_states_for_task(self, task_metrics: Tuple[str, dict],
                                     company_id: str) -> Sequence[MetricState]:
        """
        Return metric scroll states for the task filled with the variant states
        for the variants that reported any debug images
        """
        task, metrics = task_metrics
        must = [{"term": {"task": task}}, {"exists": {"field": "url"}}]
        if metrics:
            must.append(get_metric_variants_condition(metrics))
        query = {"bool": {"must": must}}
        es_req: dict = {
            "size": 0,
            "query": query,
            "aggs": {
                "metrics": {
                    "terms": {
                        "field": "metric",
                        "size": EventSettings.max_metrics_count,
                        "order": {
                            "_key": "asc"
                        },
                    },
                    "aggs": {
                        "last_event_timestamp": {
                            "max": {
                                "field": "timestamp"
                            }
                        },
                        "variants": {
                            "terms": {
                                "field": "variant",
                                "size": EventSettings.max_variants_count,
                                "order": {
                                    "_key": "asc"
                                },
                            },
                            "aggs": {
                                "urls": {
                                    "terms": {
                                        "field": "url",
                                        "order": {
                                            "max_iter": "desc"
                                        },
                                        "size":
                                        1,  # we need only one url from the most recent iteration
                                    },
                                    "aggs": {
                                        "max_iter": {
                                            "max": {
                                                "field": "iter"
                                            }
                                        },
                                        "iters": {
                                            "top_hits": {
                                                "sort": {
                                                    "iter": {
                                                        "order": "desc"
                                                    }
                                                },
                                                "size":
                                                2,  # need two last iterations so that we can take
                                                # the second one as invalid
                                                "_source": "iter",
                                            }
                                        },
                                    },
                                }
                            },
                        },
                    },
                }
            },
        }

        with translate_errors_context(), TimingContext("es",
                                                       "_init_metric_states"):
            es_res = search_company_events(
                self.es,
                company_id=company_id,
                event_type=self.EVENT_TYPE,
                body=es_req,
            )
        if "aggregations" not in es_res:
            return []

        def init_variant_state(variant: dict):
            """
            Return new variant state for the passed variant bucket
            If the image urls get recycled then fill the last_invalid_iteration field
            """
            state = VariantState(variant=variant["key"])
            top_iter_url = dpath.get(variant, "urls/buckets")[0]
            iters = dpath.get(top_iter_url, "iters/hits/hits")
            if len(iters) > 1:
                state.last_invalid_iteration = dpath.get(
                    iters[1], "_source/iter")
            return state

        return [
            MetricState(
                metric=metric["key"],
                timestamp=dpath.get(metric, "last_event_timestamp/value"),
                variants=[
                    init_variant_state(variant)
                    for variant in dpath.get(metric, "variants/buckets")
                ],
            ) for metric in dpath.get(es_res, "aggregations/metrics/buckets")
        ]
Exemplo n.º 13
0
    def _get_events(
        self,
        event_type: EventType,
        company_id: str,
        task_id: str,
        batch_size: int,
        navigate_earlier: bool,
        key: ScalarKey,
        from_key_value: Optional[Any],
        metric_variants: MetricVariants = None,
    ) -> Tuple[Sequence[dict], int]:
        """
        Return up to 'batch size' events starting from the previous key-field value (timestamp or iter) either in the
        direction of earlier events (navigate_earlier=True) or in the direction of later events.
        If from_key_field is not set then start either from latest or earliest.
        For the last key-field value all the events are brought (even if the resulting size exceeds batch_size)
        so that events with this value will not be lost between the calls.
        """
        query, must = self._get_initial_query_and_must(task_id,
                                                       metric_variants)

        # retrieve the next batch of events
        es_req = {
            "size": batch_size,
            "query": query,
            "sort": {
                key.field: "desc" if navigate_earlier else "asc"
            },
        }

        if from_key_value:
            es_req["search_after"] = [from_key_value]

        with translate_errors_context(), TimingContext("es",
                                                       "get_task_events"):
            es_result = search_company_events(
                self.es,
                company_id=company_id,
                event_type=event_type,
                body=es_req,
                routing=task_id,
            )
            hits = es_result["hits"]["hits"]
            hits_total = es_result["hits"]["total"]["value"]
            if not hits:
                return [], hits_total

            events = [hit["_source"] for hit in hits]

            # retrieve the events that match the last event timestamp
            # but did not make it into the previous call due to batch_size limitation
            es_req = {
                "size": 10000,
                "query": {
                    "bool": {
                        "must":
                        must + [{
                            "term": {
                                key.field: events[-1][key.field]
                            }
                        }]
                    }
                },
            }
            es_result = search_company_events(
                self.es,
                company_id=company_id,
                event_type=event_type,
                body=es_req,
                routing=task_id,
            )
            last_second_hits = es_result["hits"]["hits"]
            if not last_second_hits or len(last_second_hits) < 2:
                # if only one element is returned for the last timestamp
                # then it is already present in the events
                return events, hits_total

            already_present_ids = set(hit["_id"] for hit in hits)
            last_second_events = [
                hit["_source"] for hit in last_second_hits
                if hit["_id"] not in already_present_ids
            ]

            # return the list merged from original query results +
            # leftovers from the last timestamp
            return (
                [*events, *last_second_events],
                hits_total,
            )
Exemplo n.º 14
0
    def _get_task_metric_events(
        self,
        task_state: TaskScrollState,
        company_id: str,
        iter_count: int,
        navigate_earlier: bool,
    ) -> Tuple:
        """
        Return task metric events grouped by iterations
        Update task scroll state
        """
        if not task_state.metrics:
            return task_state.task, []

        if task_state.last_max_iter is None:
            # the first fetch is always from the latest iteration to the earlier ones
            navigate_earlier = True

        must_conditions = [
            {
                "term": {
                    "task": task_state.task
                }
            },
            {
                "terms": {
                    "metric": [m.metric for m in task_state.metrics]
                }
            },
            {
                "exists": {
                    "field": "url"
                }
            },
        ]

        range_condition = None
        if navigate_earlier and task_state.last_min_iter is not None:
            range_condition = {"lt": task_state.last_min_iter}
        elif not navigate_earlier and task_state.last_max_iter is not None:
            range_condition = {"gt": task_state.last_max_iter}
        if range_condition:
            must_conditions.append({"range": {"iter": range_condition}})

        es_req = {
            "size": 0,
            "query": {
                "bool": {
                    "must": must_conditions
                }
            },
            "aggs": {
                "iters": {
                    "terms": {
                        "field": "iter",
                        "size": iter_count,
                        "order": {
                            "_key": "desc" if navigate_earlier else "asc"
                        },
                    },
                    "aggs": {
                        "metrics": {
                            "terms": {
                                "field": "metric",
                                "size": EventSettings.max_metrics_count,
                                "order": {
                                    "_key": "asc"
                                },
                            },
                            "aggs": {
                                "variants": {
                                    "terms": {
                                        "field": "variant",
                                        "size":
                                        EventSettings.max_variants_count,
                                        "order": {
                                            "_key": "asc"
                                        },
                                    },
                                    "aggs": {
                                        "events": {
                                            "top_hits": {
                                                "sort": {
                                                    "url": {
                                                        "order": "desc"
                                                    }
                                                }
                                            }
                                        }
                                    },
                                }
                            },
                        }
                    },
                }
            },
        }
        with translate_errors_context(), TimingContext(
                "es", "get_debug_image_events"):
            es_res = search_company_events(
                self.es,
                company_id=company_id,
                event_type=self.EVENT_TYPE,
                body=es_req,
            )
        if "aggregations" not in es_res:
            return task_state.task, []

        invalid_iterations = {(m.metric, v.variant): v.last_invalid_iteration
                              for m in task_state.metrics for v in m.variants}

        def is_valid_event(event: dict) -> bool:
            key = event.get("metric"), event.get("variant")
            if key not in invalid_iterations:
                return False

            max_invalid = invalid_iterations[key]
            return max_invalid is None or event.get("iter") > max_invalid

        def get_iteration_events(it_: dict) -> Sequence:
            return [
                ev["_source"] for m in dpath.get(it_, "metrics/buckets")
                for v in dpath.get(m, "variants/buckets")
                for ev in dpath.get(v, "events/hits/hits")
                if is_valid_event(ev["_source"])
            ]

        iterations = []
        for it in dpath.get(es_res, "aggregations/iters/buckets"):
            events = get_iteration_events(it)
            if events:
                iterations.append({"iter": it["key"], "events": events})

        if not navigate_earlier:
            iterations.sort(key=itemgetter("iter"), reverse=True)
        if iterations:
            task_state.last_max_iter = iterations[0]["iter"]
            task_state.last_min_iter = iterations[-1]["iter"]

        return task_state.task, iterations
Exemplo n.º 15
0
    def get_task_latest_scalar_values(
        self, company_id, task_id
    ) -> Tuple[Sequence[dict], int]:
        event_type = EventType.metrics_scalar
        if check_empty_data(self.es, company_id=company_id, event_type=event_type):
            return [], 0

        query = {
            "bool": {
                "must": [
                    {"query_string": {"query": "value:>0"}},
                    {"term": {"task": task_id}},
                ]
            }
        }
        es_req = {
            "size": 0,
            "query": query,
            "aggs": {
                "metrics": {
                    "terms": {
                        "field": "metric",
                        "size": EventSettings.max_metrics_count,
                        "order": {"_key": "asc"},
                    },
                    "aggs": {
                        "variants": {
                            "terms": {
                                "field": "variant",
                                "size": EventSettings.max_variants_count,
                                "order": {"_key": "asc"},
                            },
                            "aggs": {
                                "last_value": {
                                    "top_hits": {
                                        "docvalue_fields": ["value"],
                                        "_source": "value",
                                        "size": 1,
                                        "sort": [{"iter": {"order": "desc"}}],
                                    }
                                },
                                "last_timestamp": {"max": {"field": "@timestamp"}},
                                "last_10_value": {
                                    "top_hits": {
                                        "docvalue_fields": ["value"],
                                        "_source": "value",
                                        "size": 10,
                                        "sort": [{"iter": {"order": "desc"}}],
                                    }
                                },
                            },
                        }
                    },
                }
            },
            "_source": {"excludes": []},
        }
        with translate_errors_context(), TimingContext(
            "es", "events_get_metrics_and_variants"
        ):
            es_res = search_company_events(
                self.es, company_id=company_id, event_type=event_type, body=es_req
            )

        metrics = []
        max_timestamp = 0
        for metric_bucket in es_res["aggregations"]["metrics"].get("buckets"):
            metric_summary = dict(name=metric_bucket["key"], variants=[])
            for variant_bucket in metric_bucket["variants"].get("buckets"):
                variant_name = variant_bucket["key"]
                last_value = variant_bucket["last_value"]["hits"]["hits"][0]["fields"][
                    "value"
                ][0]
                last_10_value = variant_bucket["last_10_value"]["hits"]["hits"][0][
                    "fields"
                ]["value"][0]
                timestamp = variant_bucket["last_timestamp"]["value"]
                max_timestamp = max(timestamp, max_timestamp)
                metric_summary["variants"].append(
                    dict(
                        name=variant_name,
                        last_value=last_value,
                        last_10_value=last_10_value,
                    )
                )
            metrics.append(metric_summary)
        return metrics, max_timestamp
Exemplo n.º 16
0
    def get_task_events(
        self,
        company_id: str,
        task_id: str,
        event_type: EventType,
        metric=None,
        variant=None,
        last_iter_count=None,
        sort=None,
        size=500,
        scroll_id=None,
        no_scroll=False,
    ) -> TaskEventsResult:
        if scroll_id == self.empty_scroll:
            return TaskEventsResult()

        if scroll_id:
            with translate_errors_context(), TimingContext("es", "get_task_events"):
                es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h")
        else:
            if check_empty_data(self.es, company_id=company_id, event_type=event_type):
                return TaskEventsResult()

            task_ids = [task_id] if isinstance(task_id, str) else task_id

            must = []
            if metric:
                must.append({"term": {"metric": metric}})
            if variant:
                must.append({"term": {"variant": variant}})

            if last_iter_count is None:
                must.append({"terms": {"task": task_ids}})
            else:
                tasks_iters = self.get_last_iters(
                    company_id=company_id,
                    event_type=event_type,
                    task_id=task_ids,
                    iters=last_iter_count,
                )
                should = [
                    {
                        "bool": {
                            "must": [
                                {"term": {"task": task}},
                                {"terms": {"iter": last_iters}},
                            ]
                        }
                    }
                    for task, last_iters in tasks_iters.items()
                    if last_iters
                ]
                if not should:
                    return TaskEventsResult()
                must.append({"bool": {"should": should}})

            if sort is None:
                sort = [{"timestamp": {"order": "asc"}}]

            es_req = {
                "sort": sort,
                "size": min(size, 10000),
                "query": {"bool": {"must": must}},
            }

            with translate_errors_context(), TimingContext("es", "get_task_events"):
                es_res = search_company_events(
                    self.es,
                    company_id=company_id,
                    event_type=event_type,
                    body=es_req,
                    ignore=404,
                    **({} if no_scroll else {"scroll": "1h"}),
                )

        events, total_events, next_scroll_id = self._get_events_from_es_res(es_res)
        if event_type in (EventType.metrics_plot, EventType.all):
            self.uncompress_plots(events)

        return TaskEventsResult(
            events=events, next_scroll_id=next_scroll_id, total_events=total_events
        )
Exemplo n.º 17
0
    def get_task_events(
        self,
        company_id: str,
        task_id: str,
        event_type: EventType,
        metric=None,
        variant=None,
        last_iter_count=None,
        sort=None,
        size=500,
        scroll_id=None,
    ):
        if scroll_id == self.empty_scroll:
            return [], scroll_id, 0

        if scroll_id:
            with translate_errors_context(), TimingContext(
                    "es", "get_task_events"):
                es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h")
        else:
            task_ids = [task_id] if isinstance(task_id,
                                               six.string_types) else task_id

            if check_empty_data(self.es,
                                company_id=company_id,
                                event_type=event_type):
                return TaskEventsResult()

            must = []
            if metric:
                must.append({"term": {"metric": metric}})
            if variant:
                must.append({"term": {"variant": variant}})

            if last_iter_count is None:
                must.append({"terms": {"task": task_ids}})
            else:
                should = []
                for i, task_id in enumerate(task_ids):
                    last_iters = self.get_last_iters(
                        company_id=company_id,
                        event_type=event_type,
                        task_id=task_id,
                        iters=last_iter_count,
                    )
                    if not last_iters:
                        continue
                    should.append({
                        "bool": {
                            "must": [
                                {
                                    "term": {
                                        "task": task_id
                                    }
                                },
                                {
                                    "terms": {
                                        "iter": last_iters
                                    }
                                },
                            ]
                        }
                    })
                if not should:
                    return TaskEventsResult()
                must.append({"bool": {"should": should}})

            if sort is None:
                sort = [{"timestamp": {"order": "asc"}}]

            es_req = {
                "sort": sort,
                "size": min(size, 10000),
                "query": {
                    "bool": {
                        "must": must
                    }
                },
            }

            with translate_errors_context(), TimingContext(
                    "es", "get_task_events"):
                es_res = search_company_events(
                    self.es,
                    company_id=company_id,
                    event_type=event_type,
                    body=es_req,
                    ignore=404,
                    scroll="1h",
                )

        events, total_events, next_scroll_id = self._get_events_from_es_res(
            es_res)
        return TaskEventsResult(events=events,
                                next_scroll_id=next_scroll_id,
                                total_events=total_events)
Exemplo n.º 18
0
    def get_task_plots(
        self,
        company_id: str,
        tasks: Sequence[str],
        last_iterations_per_plot: int = None,
        sort=None,
        size: int = 500,
        scroll_id: str = None,
        no_scroll: bool = False,
        metric_variants: MetricVariants = None,
    ):
        if scroll_id == self.empty_scroll:
            return TaskEventsResult()

        if scroll_id:
            with translate_errors_context(), TimingContext("es", "get_task_events"):
                es_res = self.es.scroll(scroll_id=scroll_id, scroll="1h")
        else:
            event_type = EventType.metrics_plot
            if check_empty_data(self.es, company_id=company_id, event_type=event_type):
                return TaskEventsResult()

            plot_valid_condition = {
                "bool": {
                    "should": [
                        {"term": {PlotFields.valid_plot: True}},
                        {
                            "bool": {
                                "must_not": {"exists": {"field": PlotFields.valid_plot}}
                            }
                        },
                    ]
                }
            }
            must = [plot_valid_condition]

            if last_iterations_per_plot is None:
                must.append({"terms": {"task": tasks}})
                if metric_variants:
                    must.append(get_metric_variants_condition(metric_variants))
            else:
                should = []
                for i, task_id in enumerate(tasks):
                    last_iters = self.get_last_iterations_per_event_metric_variant(
                        company_id=company_id,
                        task_id=task_id,
                        num_last_iterations=last_iterations_per_plot,
                        event_type=event_type,
                        metric_variants=metric_variants,
                    )
                    if not last_iters:
                        continue

                    for metric, variant, iter in last_iters:
                        should.append(
                            {
                                "bool": {
                                    "must": [
                                        {"term": {"task": task_id}},
                                        {"term": {"metric": metric}},
                                        {"term": {"variant": variant}},
                                        {"term": {"iter": iter}},
                                    ]
                                }
                            }
                        )
                if not should:
                    return TaskEventsResult()
                must.append({"bool": {"should": should}})

            if sort is None:
                sort = [{"timestamp": {"order": "asc"}}]

            es_req = {
                "sort": sort,
                "size": min(size, 10000),
                "query": {"bool": {"must": must}},
            }

            with translate_errors_context(), TimingContext("es", "get_task_plots"):
                es_res = search_company_events(
                    self.es,
                    company_id=company_id,
                    event_type=event_type,
                    body=es_req,
                    ignore=404,
                    **({} if no_scroll else {"scroll": "1h"}),
                )

        events, total_events, next_scroll_id = self._get_events_from_es_res(es_res)
        self.uncompress_plots(events)
        return TaskEventsResult(
            events=events, next_scroll_id=next_scroll_id, total_events=total_events
        )
Exemplo n.º 19
0
    def _get_events(
        self,
        company_id: str,
        task_id: str,
        batch_size: int,
        navigate_earlier: bool,
        from_timestamp: Optional[int],
    ) -> Tuple[Sequence[dict], int]:
        """
        Return up to 'batch size' events starting from the previous timestamp either in the
        direction of earlier events (navigate_earlier=True) or in the direction of later events.
        If last_min_timestamp and last_max_timestamp are not set then start either from latest or earliest.
        For the last timestamp all the events are brought (even if the resulting size
        exceeds batch_size) so that this timestamp events will not be lost between the calls.
        In case any events were received update 'last_min_timestamp' and 'last_max_timestamp'
        """

        # retrieve the next batch of events
        es_req = {
            "size": batch_size,
            "query": {
                "term": {
                    "task": task_id
                }
            },
            "sort": {
                "timestamp": "desc" if navigate_earlier else "asc"
            },
        }

        if from_timestamp:
            es_req["search_after"] = [from_timestamp]

        with translate_errors_context(), TimingContext("es",
                                                       "get_task_events"):
            es_result = search_company_events(
                self.es,
                company_id=company_id,
                event_type=self.EVENT_TYPE,
                body=es_req,
            )
            hits = es_result["hits"]["hits"]
            hits_total = es_result["hits"]["total"]["value"]
            if not hits:
                return [], hits_total

            events = [hit["_source"] for hit in hits]

            # retrieve the events that match the last event timestamp
            # but did not make it into the previous call due to batch_size limitation
            es_req = {
                "size": 10000,
                "query": {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "task": task_id
                                }
                            },
                            {
                                "term": {
                                    "timestamp": events[-1]["timestamp"]
                                }
                            },
                        ]
                    }
                },
            }
            es_result = search_company_events(
                self.es,
                company_id=company_id,
                event_type=self.EVENT_TYPE,
                body=es_req,
            )
            last_second_hits = es_result["hits"]["hits"]
            if not last_second_hits or len(last_second_hits) < 2:
                # if only one element is returned for the last timestamp
                # then it is already present in the events
                return events, hits_total

            already_present_ids = set(hit["_id"] for hit in hits)
            last_second_events = [
                hit["_source"] for hit in last_second_hits
                if hit["_id"] not in already_present_ids
            ]

            # return the list merged from original query results +
            # leftovers from the last timestamp
            return (
                [*events, *last_second_events],
                hits_total,
            )