def _calculate_metric_interval(metric_variant: dict, samples: int) -> int: """ Calculate index interval per metric_variant variant so that the total amount of intervals does not exceeds the samples """ count = safe_get(metric_variant, "count/value") if not count or count < samples: return 1 min_index = safe_get(metric_variant, "min_index/value", default=0) max_index = safe_get(metric_variant, "max_index/value", default=min_index) return max(1, int(max_index - min_index + 1) // samples)
def _get_metric_fields(metrics: Sequence[dict]) -> dict: names = { "cpu_usage": "cpu_usage", "memory_used": "mem_used_gb", "memory_free": "mem_free_gb", } return { names[m["key"]]: { "min": safe_get(m, "min/value"), "max": safe_get(m, "max/value"), "avg": safe_get(m, "avg/value"), } for m in metrics if m["key"] in names }
def _get_task_metrics(self, task_id, es_index, event_type: EventType) -> Sequence: es_req = { "size": 0, "query": { "bool": { "must": [ {"term": {"task": task_id}}, {"term": {"type": event_type.value}}, ] } }, "aggs": { "metrics": { "terms": {"field": "metric", "size": self.MAX_METRICS_COUNT} } }, } with translate_errors_context(), TimingContext("es", "_get_task_metrics"): es_res = self.es.search(index=es_index, body=es_req, routing=task_id) return [ metric["key"] for metric in safe_get(es_res, "aggregations/metrics/buckets", default=[]) ]
def _get_active_workers(cls, company_id, from_timestamp: int, to_timestamp: int) -> dict: es_req = { "size": 0, "query": QueryBuilder.dates_range(from_timestamp, to_timestamp), "aggs": { "workers": { "terms": { "field": "worker" }, "aggs": { "last_activity_time": { "max": { "field": "timestamp" } } }, } }, } res = cls._run_worker_stats_query(company_id, es_req) buckets = safe_get(res, "aggregations/workers/buckets", default=[]) return { b["key"]: { "last_activity_time": b["last_activity_time"]["value"] } for b in buckets }
def _get_cardinality_fields(categories: Sequence[dict]) -> dict: names = {"cpu": "num_cores"} return { names[c["key"]]: safe_get(c, "count/value") for c in categories if c["key"] in names }
def get_page(page_number): """Returns the HTML of a specific list page or None if response failed""" url = "https://www.imdb.com/list/" + list_id + "/?sort=list_order,asc&st_dt=&mode=detail&page=" + str( page_number) response = safe_get(url) if response is None: return None else: return response.text
def get_movie_synopsis(movie_id): response = safe_get('https://www.imdb.com/title/' + movie_id + '/plotsummary') if response is None: return None movie_page_html = BeautifulSoup(response.content, 'html.parser') if movie_page_html.select('#no-synopsis-content'): return None else: synopsis_html = movie_page_html.findAll( 'li', id=lambda x: x and x.startswith('synopsis-'))[0] return remove_spaces(synopsis_html)
def unprepare_from_saved(call: APICall, tasks_data: Union[Sequence[dict], dict]): if isinstance(tasks_data, dict): tasks_data = [tasks_data] conform_output_tags(call, tasks_data) for task_data in tasks_data: parameters = safe_get(task_data, "execution/parameters") if parameters is not None: # Escape keys to make them mongo-safe parameters = { ParameterKeyEscaper.unescape(k): v for k, v in parameters.items() } dpath.set(task_data, "execution/parameters", parameters)
def prepare_create_fields(call: APICall, valid_fields=None, output=None, previous_task: Task = None): valid_fields = valid_fields if valid_fields is not None else create_fields t_fields = task_fields t_fields.add("output_dest") fields = parse_from_call(call.data, valid_fields, t_fields) # Move output_dest to output.destination output_dest = fields.get("output_dest") if output_dest is not None: fields.pop("output_dest") if output: output.destination = output_dest else: output = Output(destination=output_dest) fields["output"] = output try: dpath.delete(fields, "script/requirements") except dpath.exceptions.PathNotFound: pass # Make sure there are no duplicate tags tags = fields.get("tags") if tags: fields["tags"] = list(set(tags)) # Strip all script fields (remove leading and trailing whitespace chars) to avoid unusable names and paths for field in task_script_fields: try: path = "script/%s" % field value = dpath.get(fields, path) if isinstance(value, six.string_types): value = value.strip() dpath.set(fields, path, value) except KeyError: pass parameters = safe_get(fields, "execution/parameters") if parameters is not None: parameters = {k.strip(): v for k, v in parameters.items()} dpath.set(fields, "execution/parameters", parameters) return fields
def prepare_for_save(call: APICall, fields: dict): conform_tag_fields(call, fields) # Strip all script fields (remove leading and trailing whitespace chars) to avoid unusable names and paths for field in task_script_fields: try: path = f"script/{field}" value = dpath.get(fields, path) if isinstance(value, str): value = value.strip() dpath.set(fields, path, value) except KeyError: pass parameters = safe_get(fields, "execution/parameters") if parameters is not None: # Escape keys to make them mongo-safe parameters = {ParameterKeyEscaper.escape(k): v for k, v in parameters.items()} dpath.set(fields, "execution/parameters", parameters) return fields
def _get_resource_stats_per_agent(cls, company_id: str, key: str) -> dict: agent_resource_threshold_sec = timedelta(hours=config.get( "apiserver.statistics.report_interval_hours", 24)).total_seconds() to_timestamp = int(time.time()) from_timestamp = to_timestamp - int(agent_resource_threshold_sec) es_req = { "size": 0, "query": QueryBuilder.dates_range(from_timestamp, to_timestamp), "aggs": { "workers": { "terms": { "field": "worker" }, "aggs": { "categories": { "terms": { "field": "category" }, "aggs": { "count": { "cardinality": { "field": "variant" } } }, }, "metrics": { "terms": { "field": "metric" }, "aggs": { "min": { "min": { "field": "value" } }, "max": { "max": { "field": "value" } }, "avg": { "avg": { "field": "value" } }, }, }, }, } }, } res = cls._run_worker_stats_query(company_id, es_req) def _get_cardinality_fields(categories: Sequence[dict]) -> dict: names = {"cpu": "num_cores"} return { names[c["key"]]: safe_get(c, "count/value") for c in categories if c["key"] in names } def _get_metric_fields(metrics: Sequence[dict]) -> dict: names = { "cpu_usage": "cpu_usage", "memory_used": "mem_used_gb", "memory_free": "mem_free_gb", } return { names[m["key"]]: { "min": safe_get(m, "min/value"), "max": safe_get(m, "max/value"), "avg": safe_get(m, "avg/value"), } for m in metrics if m["key"] in names } buckets = safe_get(res, "aggregations/workers/buckets", default=[]) return { b["key"]: { key: { "interval_sec": agent_resource_threshold_sec, **_get_cardinality_fields( safe_get(b, "categories/buckets", [])), **_get_metric_fields(safe_get(b, "metrics/buckets", [])), } } for b in buckets }