def __call__(self, **kwargs): analytics = get_analytics_interface() counter = collections.defaultdict(lambda: collections.defaultdict(int)) records = list(analytics.col.find({"api.name": {"$exists": True}})) for record in records: counter[record['system']][record['api']['name']] \ += record['counter'] api_plot = dict(central_label=False, data=nested_to_baobab(counter), external=False, title="API Calls") return ("analytics_report_api", {"api_plot": api_plot})
def __call__(self, **kwargs): period = int(kwargs.get('period', 7*86400)) view_key = kwargs.get('key', None) now = time.time() analytics = get_analytics_interface() analyzer_exists = any([task['classname']=="QueryAnalyzer" \ for task in self.scheduler.get_registry().values()]) summaries = analytics.get_summary(\ identifier="query_analyzer", after=now-period) # [ (query_structure, count) ] count_by_key = collections.defaultdict(int) field_count_by_key = collections.defaultdict(\ lambda: collections.defaultdict(int)) instance_count = collections.defaultdict(int) constraint_by_key = collections.defaultdict(\ lambda: collections.defaultdict(int)) seen_keys = set() time_bins = period / 3600 if time_bins > self.max_series_length: time_bins = self.max_series_length time_interval = float(period) / time_bins time_series = collections.defaultdict(lambda: [0]*(time_bins+1)) total_queries = 0 for summary in summaries: midtime = 0.5*(summary['start']+summary['finish']) time_bin = int((midtime - (now - period)) / time_interval) for query in summary['queries']: if view_key and not view_key in query[0]['keys']: continue count = query[1] total_queries += count for key in query[0]['keys']: seen_keys.add(key) constraint_by_key[key][query[0]['keys'][key]] += count count_by_key[key] += count for field in query[0]['fields']: field_count_by_key[key][field] += count time_series[key][time_bin] += count instance_count[query[0]['instance']] += count time_plot = dict(legend="topleft", series=[dict(label=key, values=time_series[key]) \ for key in time_series], title="Calls by time", xaxis=dict(bins=time_bins+1, min=now-period, width=time_interval, label="Time", format="time"), yaxis=dict(label="Queries")) constraint_plot = dict(central_label=False, data=nested_to_baobab(constraint_by_key), external=False, title="Constraint by key") field_plot = dict(central_label=False, data=nested_to_baobab(field_count_by_key), external=False, title="Field by key") instance_plot = dict(labels=True, percentage=True, series=[{'label':instance, 'value': instance_count[instance]} \ for instance in instance_count], title="DBS Instance") key_plot = dict(labels=True, percentage=True, series=[{'label':key, 'value': count_by_key[key]} \ for key in count_by_key], title="Key(s) used") popular_key = sorted(count_by_key, \ key=lambda x: count_by_key[x])[-1] if count_by_key else None return ("analytics_report_query", {"nsummaries": len(summaries), "nqueries": total_queries, "view_key": view_key, "seen_keys": seen_keys, "analyzer_exists": analyzer_exists, "constraint_plot":constraint_plot, "field_plot":field_plot, "instance_plot":instance_plot, "key_plot":key_plot, "time_plot":time_plot, "period":period, "popular_key":popular_key})
def hotspot_report(self, **kwargs): "Hotspot report" analytics = get_analytics_interface() identifier = kwargs['identifier'] taskdicts = [task for task in \ self.scheduler.get_registry().values() \ if 'Hotspot' in task['classname'] and \ gen_identifier(task) == identifier] taskobj = None if taskdicts: taskobj = taskdicts[0] period = 86400*30 interval = 3600*4 fraction = 0.15 if taskobj: period = taskobj['kwargs'].get('period', 86400*30) interval = taskobj['interval'] fraction = taskobj['kwargs'].get('fraction', 0.15) period = int(kwargs.get('period', period)) fraction = float(kwargs.get('fraction', fraction)) epoch_end = time.time() epoch_start = time.time() - period summaries = analytics.get_summary(identifier, after=epoch_start, before=epoch_end) counter = collections.defaultdict(int) map(lambda x: counter.update(x['keys']), summaries) sorted_keys = sorted(counter, key=lambda x: counter[x]) total_calls = float(sum(counter.values())) key_series = [] call_count = 0 for key in sorted_keys: call_count += counter[key] key_series += [call_count] binning = 1 if len(key_series) > self.max_series_length: binning = len(key_series)/self.max_series_length key_series = key_series[len(key_series)%binning-1::binning] key_plot = dict(legend="null", series=[dict(colour="#ff0000", label="Calls", values=key_series)], title="Cumulative calls by key", xaxis=dict(bins=len(key_series)-1, label="Keys", width=binning, min=0), yaxis=dict(label="Cumulative calls")) summary_durations = [s['finish'] - s['start'] for s in summaries] summary_density = [len(s['keys']) for s in summaries] summary_plot = dict(legend="null", series=[dict(colour="#ff0000", label="Summaries", marker="*", x=summary_durations, y=summary_density)], title="Summary length and call count", xaxis=dict(label="Summary length"), yaxis=dict(label="Number of calls")) time_bins = int((epoch_end - epoch_start) / interval) time_interval = interval if time_bins > self.max_series_length: time_bins = self.max_series_length time_interval = (epoch_end - epoch_start) / time_bins time_series = [0]*(time_bins+1) for sss in summaries: bin = int(((0.5*(sss['finish']+s['start'])) - epoch_start)\ / time_interval) time_series[bin] += len(s['keys']) time_plot = dict(legend="null", series=[dict(colour="#ff0000", label="Query density over time", values=time_series)], title="Query density over time", xaxis=dict(bins=time_bins+1, label="Time", format="time", min=epoch_start, width=time_interval), yaxis=dict(label="Calls")) selected = [] cumulative = 0 while sorted_keys and cumulative < fraction * total_calls: key = sorted_keys.pop() cumulative += counter[key] selected += [key] return ("analytics_report_hotspot", {"list":False, "identifier":identifier, "task":taskobj, "nkeys":len(counter.keys()), "ncalls":total_calls, "nsummaries":len(summaries), "key_plot":key_plot, "summary_plot":summary_plot, "time_plot":time_plot, "selected":selected})