def show_core_locality(df, task_re, label): df = df[df['event'] == 'sched__sched_switch'] df = df[df['task_name'].str.match(task_re)] # aggregate all the per core tasks (e.g. swapper/0 -> swapper) df['task_name'] = df['task_name'].str.replace(r'/.*$', '') # group by task name gb = df.groupby('task_name') task_list = gb.groups p = figure(plot_width=1000, plot_height=800, **title_style) p.xaxis.axis_label = 'time (usecs)' p.yaxis.axis_label = 'core' p.legend.orientation = "bottom_right" p.xaxis.axis_label_text_font_size = "10pt" p.yaxis.axis_label_text_font_size = "10pt" p.title = "Core locality (%s)" % (label) color_list = cycle_colors(task_list) for task, color in zip(task_list, color_list): dfe = gb.get_group(task) # add 1 column to contain the starting time for each run period dfe['start'] = dfe['usecs'] - dfe['duration'] tid = dfe['pid'].iloc[0] count = len(dfe) legend_text = '%s:%d (%d)' % (task, tid, count) # draw end of runs p.circle('usecs', 'cpu', source=ColumnDataSource(dfe), size=get_disc_size(count) + 2, color=color, alpha=0.3, legend=legend_text) # draw segments to show the entire runs p.segment('start', 'cpu', 'usecs', 'cpu', line_width=5, line_color=color, source=ColumnDataSource(dfe)) # specify how to output the plot(s) output_html(p, 'coreloc', task_re)
def show_core_locality(df, task_re, label): df = df[df['event'] == 'sched__sched_switch'] df = df[df['task_name'].str.match(task_re)] # aggregate all the per core tasks (e.g. swapper/0 -> swapper) df['task_name'] = df['task_name'].str.replace(r'/.*$', '') # group by task name gb = df.groupby('task_name') task_list = gb.groups p = figure(plot_width=1000, plot_height=800, **title_style) p.xaxis.axis_label = 'time (usecs)' p.yaxis.axis_label = 'core' p.legend.orientation = "bottom_right" p.xaxis.axis_label_text_font_size = "10pt" p.yaxis.axis_label_text_font_size = "10pt" p.title = "Core locality (%s)" % (label) color_list = cycle_colors(task_list) for task, color in zip(task_list, color_list): dfe = gb.get_group(task) # add 1 column to contain the starting time for each run period dfe['start'] = dfe['usecs'] - dfe['duration'] tid = dfe['pid'].iloc[0] count = len(dfe) legend_text = '%s:%d (%d)' % (task, tid, count) # draw end of runs p.circle('usecs', 'cpu', source=ColumnDataSource(dfe), size=get_disc_size(count) + 2, color=color, alpha=0.3, legend=legend_text) # draw segments to show the entire runs p.segment('start', 'cpu', 'usecs', 'cpu', line_width=5, line_color=color, source=ColumnDataSource(dfe)) # specify how to output the plot(s) output_html('coreloc', task_re) # display the figure show(p)
def show_sw_kvm_heatmap(df, task_re, label, show_ctx_switches, show_kvm): gb = get_groupby(df, task_re) chart_list = [] # these are the 2 main events to show for kvm events legend_map_kvm = { 'kvm_exit': (BLUE, 'vcpu running (y=vcpu run time)', False), 'kvm_entry': (ORANGE, 'vcpu not running (y=kvm+sleep time)', False) } # context switch events legend_map_ctx_sw = { 'sched__sched_stat_sleep': (RED, 'wakeup from sleep (y=sleep time)', True), 'sched__sched_switch': (GREEN, 'switched out from cpu (y=run time)', True) } if show_kvm and show_ctx_switches: legend_map = dict(legend_map_kvm.items() + legend_map_ctx_sw.items()) title = "Scheduler and KVM events" prefix = 'swkvm' elif show_kvm: legend_map = legend_map_kvm title = "KVM events" prefix = 'kvm' else: legend_map = legend_map_ctx_sw title = "Scheduler events" prefix = 'sw' width = 1000 height = 800 show_legend = True nb_charts = len(gb.groups) if nb_charts == 0: print 'No selection matching: ' + task_re return if nb_charts > 1: width /= 2 height /= 2 tstyle = grid_title_style else: tstyle = title_style task_list = gb.groups.keys() task_list.sort() show_legend = True duration_max = usecs_max = -1 duration_min = usecs_min = sys.maxint event_list = legend_map.keys() event_list.sort() for task in task_list: p = figure(plot_width=width, plot_height=height, y_axis_type="log", **tstyle) p.xaxis.axis_label = 'time (usecs)' p.yaxis.axis_label = 'duration (usecs)' p.legend.orientation = "bottom_right" p.xaxis.axis_label_text_font_size = "10pt" p.yaxis.axis_label_text_font_size = "10pt" if label: p.title = "%s for %s (%s)" % (title, task, label) label = None else: p.title = task p.ygrid.minor_grid_line_color = 'navy' p.ygrid.minor_grid_line_alpha = 0.1 accumulated_time = {} total_time = 0 dfg = gb.get_group(task) # remove any row with zero duration as it confuses the chart library dfg = dfg[dfg['duration'] > 0] for event in event_list: dfe = dfg[dfg.event == event] duration_min = min(duration_min, dfe['duration'].min()) duration_max = max(duration_max, dfe['duration'].max()) usecs_min = min(usecs_min, dfe['usecs'].min()) usecs_max = max(usecs_max, dfe['usecs'].max()) count = len(dfe) color, legend_text, cx_sw = legend_map[event] if show_legend: legend_text = '%s (%d)' % (legend_text, count) elif color == GREEN: legend_text = '(%d)' % (count) else: legend_text = None # there is bug in bokeh when there are too many circles to draw, nothing is visible if len(dfe) > 50000: dfe = dfe[:50000] print 'Series for %s display truncated to 50000 events' % (event) if cx_sw: draw_shape = p.circle size = get_disc_size(count) else: draw_shape = p.diamond size = get_disc_size(count) + 4 draw_shape('usecs', 'duration', source=ColumnDataSource(dfe), size=size, color=color, alpha=0.3, legend=legend_text) event_duration = dfe['duration'].sum() accumulated_time[event] = event_duration total_time += event_duration chart_list.append(p) show_legend = False shared_x_range = Range1d(usecs_min, usecs_max) shared_y_range = Range1d(duration_min, duration_max) for p in chart_list: p.x_range = shared_x_range p.y_range = shared_y_range # specify how to output the plot(s) output_html(prefix, task_re) # display the figure if len(chart_list) == 1: show(chart_list[0]) else: # split the list into an array of rows with 2 charts per row gp = gridplot(split_list(chart_list, 2)) show(gp)
def show_core_runs(df, task_re, label, duration): time_span_msec = get_time_span_msec(df) # remove unneeded columns df = df.drop('next_pid', axis=1) df.drop('pid', axis=1, inplace=True) df.drop('usecs', axis=1, inplace=True) df.drop('next_comm', axis=1, inplace=True) # filter out all events except the switch events df = df[df.event == 'sched__sched_switch'] df = df.drop('event', axis=1) df = df[df['task_name'].str.match(task_re)] # at this point we have a df that looks like this: # task_name cpu duration # 0 ASA.1.vcpu0 8 7954 # 1 ASA.1.vcpu0 9 5475 # 2 ASA.1.vcpu0 10 4151 # 3 ASA.1.vcpu0 11 12391 # 4 ASA.1.vcpu0 12 21025 # 5 ASA.1.vcpu0 13 6447 # 6 ASA.1.vcpu0 14 16798 # 7 ASA.1.vcpu0 15 3911 # 8 ASA.10.vcpu0 8 4248 # 9 ASA.10.vcpu0 9 3534 # 10 ASA.10.vcpu0 10 15624 # 11 ASA.10.vcpu0 11 6925 # etc... if len(df) == 0: print print 'No selection matching "%s"' % (task_re) return gb = df.groupby(['task_name', 'cpu'], as_index=False) if duration: # add duration values df = gb.aggregate(np.sum) max_core = df.cpu.max() dfsum = df.drop('cpu', axis=1) gb = dfsum.groupby('task_name', as_index=False) dfsum = gb.aggregate(np.sum) # dfsum is the sum of all duration for given task # 0 ASA.1.vcpu0 78152 # 1 ASA.10.vcpu0 65637 # 2 ASA.11.vcpu0 81525 # 3 ASA.12.vcpu0 56488 # For each task, the maximum runtime is the time_span_msec (100% of 1 core) # The idle time for each task is therefore time_span_msec * 1000 - sum(duration) dfsum['cpu'] = 'IDLE' time_span_usec = time_span_msec * 1000 dfsum['duration'] = time_span_usec - dfsum['duration'] # now we need to reinsert that data back to the df dfm = pandas.concat([df, dfsum], ignore_index=True) # task_name cpu duration total # 0 ASA.1.vcpu0 8 7954 78152 # 1 ASA.1.vcpu0 9 5475 78152 # 2 ASA.1.vcpu0 10 4151 78152 # Add a % column dfm['percent'] = ((dfm['duration'] * 100 * 10) // time_span_usec) / 10 # This is for the legend min_count = 0 max_count = 100 range_unit = '%' # many core-pinned system tasks have a duration of 0 (swapper, watchdog...) dfm.fillna(100, inplace=True) dfm.drop(['duration'], axis=1, inplace=True) tooltip_count = ("time", "@percent% of core @cpu") title = "Task Run Time %% per Core (%s, %d msec window)" % (label, time_span_msec) # this is YlGnBu9[::-1] (Reverse the color order so dark is highest value) # with an extra intermediate color to make it 10 palette = ['#ffffd9', '#edf8b1', '#c7e9b4', '#a3dbb7', '#7fcdbb', '#41b6c4', '#1d91c0', '#225ea8', '#253494', '#081d58'] html_prefix = 'core_runtime' # add 1 extra column in the heatmap for core "IDLE" to represent the IDLE time # this requires adding 1 row per real core that has a runtime set to # capture window size - sum(duration on all cores) else: # count number of rows with same task and cpu dfm = DataFrame(gb.size()) dfm.reset_index(inplace=True) dfm.rename(columns={0: 'count'}, inplace=True) min_count = dfm['count'].min() max_count = dfm['count'].max() range_unit = '' spread = max_count - min_count # Add a % column dfm['percent'] = ((dfm['count'] - min_count) * 100) / spread tooltip_count = ("context switches", "@count") title = "Task Context Switches per Core (%s, %d msec window)" % (label, time_span_msec) palette = YlOrRd9[::-1] html_prefix = 'core_switches' max_core = dfm.cpu.max() # get the list of cores # round up to next mutliple of 4 - 1 # 0..3 -> 3 # 4..7 -> 7 etc max_core |= 0x03 max_core = max(max_core, 3) core_list = [str(x) for x in range(max_core + 1)] if duration: core_list.append('IDLE') # make room for the legend by adding 3 empty columns core_list += ['', '', ''] dfm['cpu'] = dfm['cpu'].astype(str) # replace ':' with '_' as it would cause bokeh to misplace the labels on the chart dfm['task_name'] = dfm['task_name'].str.replace(':', '_') normalize_df_task_name(dfm) # Add a column for the Y axis # each task name should be associated to a unique Y index # Get a unique list of task names sorted task_list = pandas.unique(dfm.task_name.ravel()).tolist() task_list.sort() # Add a color column dfm['color'] = dfm.apply(lambda row: get_color(row['percent'], palette), axis=1) # switch to str type to prevent the tooltip to # display percent value with 3 digits dfm.percent = dfm.percent.astype(str) # make enough vertical space for the legend # the legend needs 1 row per palette color + 1 to fit the max value # so we need at least len(palette) + 1 rows if len(task_list) < len(palette) + 1: task_list += ['' for _ in range(len(palette) + 1 - len(task_list))] TOOLS = "resize,hover,save" p = figure(title=title, tools=TOOLS, x_range=core_list, y_range=task_list, **title_style) p.plot_width = 1000 p.plot_height = 80 + len(task_list) * 16 p.toolbar_location = "left" source = ColumnDataSource(dfm) # the name is to flag these rectangles to enable tooltip hover on them # (and not enable tooltips on the legend patches) p.rect("cpu", "task_name", width=1, height=0.9, source=source, fill_alpha=0.8, color="color", name='patches') p.grid.grid_line_color = None # trace separator lines to isolate blocks across core groups (numa sockets) and task-like names max_y = len(task_list) # trace a vertical line every 8 cores for seg_x in range(8, max_core + 7, 8): p.segment(x0=[seg_x + 0.5], y0=[0], x1=[seg_x + 0.5], y1=[max_y + 0.5], color=GRAY, line_width=2) prev_task_name = None # trace a horizontal line around every group of tasks that have the same first 3 characters for y in range(max_y): cur_task_name = task_list[y] if prev_task_name: if prev_task_name[0:3] != cur_task_name[0:3]: p.segment(x0=[0], y0=[y + 0.5], x1=[max_core + 1.5], y1=[y + 0.5], color=GRAY, line_width=0.5) prev_task_name = task_list[y] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("task", "@task_name"), ("core", "@cpu"), tooltip_count ]) # only enable tooltip on rectangles with name 'patches' hover.names = ['patches'] # legend to the right # we try to center the legend vertically legend_base_y = 0 palette_len = len(palette) if max_y > palette_len + 1: legend_base_y = (max_y - palette_len - 1) // 2 if duration: # IDLE cpu inserted so shift the legend by 1 position to the right max_core += 1 # pass 1 is to draw the color patches # prepare a data source with a x, y and a color column x_values = np.empty(palette_len) x_values.fill(max_core + 2.5) y_values = np.arange(legend_base_y + 1.5, legend_base_y + palette_len + 1, 1) dfl = DataFrame({'x': x_values, 'y': y_values, 'color': palette}) source = ColumnDataSource(dfl) p.rect(x='x', y='y', color='color', width=1, height=1, source=source) # pass 2 is to draw the text describing the ranges for the color patches color_value_list = get_color_value_list(min_count, max_count, palette, range_unit) x_values = np.empty(len(color_value_list)) x_values.fill(max_core + 3.1) y_values = np.arange(legend_base_y + 0.7, legend_base_y + len(color_value_list), 1) dfl = DataFrame({'x': x_values, 'y': y_values, 'color_values': color_value_list}) source = ColumnDataSource(dfl) p.text(x='x', y='y', text='color_values', source=source, text_font_size='8pt') output_html(p, html_prefix, task_re)
def show_core_runs(df, task_re, label, duration): time_span_msec = get_time_span_msec(df) # remove unneeded columns df = df.drop('next_pid', axis=1) df.drop('pid', axis=1, inplace=True) df.drop('usecs', axis=1, inplace=True) df.drop('next_comm', axis=1, inplace=True) # filter out all events except the switch events df = df[df.event == 'sched__sched_switch'] df = df.drop('event', axis=1) df = df[df['task_name'].str.match(task_re)] # at this point we have a df that looks like this: # task_name cpu duration # 0 ASA.1.vcpu0 8 7954 # 1 ASA.1.vcpu0 9 5475 # 2 ASA.1.vcpu0 10 4151 # 3 ASA.1.vcpu0 11 12391 # 4 ASA.1.vcpu0 12 21025 # 5 ASA.1.vcpu0 13 6447 # 6 ASA.1.vcpu0 14 16798 # 7 ASA.1.vcpu0 15 3911 # 8 ASA.10.vcpu0 8 4248 # 9 ASA.10.vcpu0 9 3534 # 10 ASA.10.vcpu0 10 15624 # 11 ASA.10.vcpu0 11 6925 # etc... if len(df) == 0: print print 'No selection matching "%s"' % (task_re) return gb = df.groupby(['task_name', 'cpu'], as_index=False) if duration: # add duration values df = gb.aggregate(np.sum) max_core = df.cpu.max() dfsum = df.drop('cpu', axis=1) gb = dfsum.groupby('task_name', as_index=False) dfsum = gb.aggregate(np.sum) # dfsum is the sum of all duration for given task # 0 ASA.1.vcpu0 78152 # 1 ASA.10.vcpu0 65637 # 2 ASA.11.vcpu0 81525 # 3 ASA.12.vcpu0 56488 # For each task, the maximum runtime is the time_span_msec (100% of 1 core) # The idle time for each task is therefore time_span_msec * 1000 - sum(duration) dfsum['cpu'] = 'IDLE' time_span_usec = time_span_msec * 1000 dfsum['duration'] = time_span_usec - dfsum['duration'] # now we need to reinsert that data back to the df dfm = pandas.concat([df, dfsum], ignore_index=True) # task_name cpu duration total # 0 ASA.1.vcpu0 8 7954 78152 # 1 ASA.1.vcpu0 9 5475 78152 # 2 ASA.1.vcpu0 10 4151 78152 # Add a % column dfm['percent'] = ((dfm['duration'] * 100 * 10) // time_span_usec) / 10 # This is for the legend min_count = 0 max_count = 100 range_unit = '%' # many core-pinned system tasks have a duration of 0 (swapper, watchdog...) dfm.fillna(100, inplace=True) dfm.drop(['duration'], axis=1, inplace=True) tooltip_count = ("time", "@percent% of core @cpu") title = "Task Run Time %% per Core (%s, %d msec window)" % ( label, time_span_msec) # this is YlGnBu9[::-1] (Reverse the color order so dark is highest value) # with an extra intermediate color to make it 10 palette = [ '#ffffd9', '#edf8b1', '#c7e9b4', '#a3dbb7', '#7fcdbb', '#41b6c4', '#1d91c0', '#225ea8', '#253494', '#081d58' ] html_prefix = 'core_runtime' # add 1 extra column in the heatmap for core "IDLE" to represent the IDLE time # this requires adding 1 row per real core that has a runtime set to # capture window size - sum(duration on all cores) else: # count number of rows with same task and cpu dfm = DataFrame(gb.size()) dfm.reset_index(inplace=True) dfm.rename(columns={0: 'count'}, inplace=True) min_count = dfm['count'].min() max_count = dfm['count'].max() range_unit = '' spread = max_count - min_count # Add a % column dfm['percent'] = ((dfm['count'] - min_count) * 100) / spread tooltip_count = ("context switches", "@count") title = "Task Context Switches per Core (%s, %d msec window)" % ( label, time_span_msec) palette = YlOrRd9[::-1] html_prefix = 'core_switches' max_core = dfm.cpu.max() # get the list of cores # round up to next mutliple of 4 - 1 # 0..3 -> 3 # 4..7 -> 7 etc max_core |= 0x03 max_core = max(max_core, 3) core_list = [str(x) for x in range(max_core + 1)] if duration: core_list.append('IDLE') # make room for the legend by adding 3 empty columns core_list += ['', '', ''] dfm['cpu'] = dfm['cpu'].astype(str) # replace ':' with '_' as it would cause bokeh to misplace the labels on the chart dfm['task_name'] = dfm['task_name'].str.replace(':', '_') normalize_df_task_name(dfm) # Add a column for the Y axis # each task name should be associated to a unique Y index # Get a unique list of task names sorted task_list = pandas.unique(dfm.task_name.ravel()).tolist() task_list.sort() # Add a color column dfm['color'] = dfm.apply(lambda row: get_color(row['percent'], palette), axis=1) # switch to str type to prevent the tooltip to # display percent value with 3 digits dfm.percent = dfm.percent.astype(str) # make enough vertical space for the legend # the legend needs 1 row per palette color + 1 to fit the max value # so we need at least len(palette) + 1 rows if len(task_list) < len(palette) + 1: task_list += ['' for _ in range(len(palette) + 1 - len(task_list))] TOOLS = "resize,hover,save" p = figure(title=title, tools=TOOLS, x_range=core_list, y_range=task_list, **title_style) p.plot_width = 1000 p.plot_height = 80 + len(task_list) * 16 p.toolbar_location = "left" source = ColumnDataSource(dfm) # the name is to flag these rectangles to enable tooltip hover on them # (and not enable tooltips on the legend patches) p.rect("cpu", "task_name", width=1, height=0.9, source=source, fill_alpha=0.8, color="color", name='patches') p.grid.grid_line_color = None # trace separator lines to isolate blocks across core groups (numa sockets) and task-like names max_y = len(task_list) # trace a vertical line every 8 cores for seg_x in range(8, max_core + 7, 8): p.segment(x0=[seg_x + 0.5], y0=[0], x1=[seg_x + 0.5], y1=[max_y + 0.5], color=GRAY, line_width=2) prev_task_name = None # trace a horizontal line around every group of tasks that have the same first 3 characters for y in range(max_y): cur_task_name = task_list[y] if prev_task_name: if prev_task_name[0:3] != cur_task_name[0:3]: p.segment(x0=[0], y0=[y + 0.5], x1=[max_core + 1.5], y1=[y + 0.5], color=GRAY, line_width=0.5) prev_task_name = task_list[y] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([("task", "@task_name"), ("core", "@cpu"), tooltip_count]) # only enable tooltip on rectangles with name 'patches' hover.names = ['patches'] # legend to the right # we try to center the legend vertically legend_base_y = 0 palette_len = len(palette) if max_y > palette_len + 1: legend_base_y = (max_y - palette_len - 1) // 2 if duration: # IDLE cpu inserted so shift the legend by 1 position to the right max_core += 1 # pass 1 is to draw the color patches # prepare a data source with a x, y and a color column x_values = np.empty(palette_len) x_values.fill(max_core + 2.5) y_values = np.arange(legend_base_y + 1.5, legend_base_y + palette_len + 1, 1) dfl = DataFrame({'x': x_values, 'y': y_values, 'color': palette}) source = ColumnDataSource(dfl) p.rect(x='x', y='y', color='color', width=1, height=1, source=source) # pass 2 is to draw the text describing the ranges for the color patches color_value_list = get_color_value_list(min_count, max_count, palette, range_unit) x_values = np.empty(len(color_value_list)) x_values.fill(max_core + 3.1) y_values = np.arange(legend_base_y + 0.7, legend_base_y + len(color_value_list), 1) dfl = DataFrame({ 'x': x_values, 'y': y_values, 'color_values': color_value_list }) source = ColumnDataSource(dfl) p.text(x='x', y='y', text='color_values', source=source, text_font_size='8pt') output_html(p, html_prefix, task_re)
def show_kvm_exit_types(df, task_re, label): df = df[df['event'] == 'kvm_exit'] df = df[df['task_name'].str.match(task_re)] # the next_comm column contains the exit code exit_codes = Series(KVM_EXIT_REASONS) # add new column congaining the exit reason in clear text df['exit_reason'] = df['next_comm'].map(exit_codes) time_span_msec = get_time_span_msec(df) df.drop(['cpu', 'duration', 'event', 'next_pid', 'pid', 'next_comm', 'usecs'], inplace=True, axis=1) # Get the list of exit reasons, sorted alphabetically reasons = pandas.unique(df.exit_reason.ravel()).tolist() reasons.sort() # group by task name then exit reasons gb = df.groupby(['task_name', 'exit_reason']) # number of exit types size_series = gb.size() df = size_series.to_frame('count') df.reset_index(inplace=True) p = Bar(df, label='task_name', values='count', stack='exit_reason', title="KVM Exit types per task (%s, %d msec window)" % (label, time_span_msec), legend='top_right', tools="resize,hover,save", width=1000, height=800) p._xaxis.axis_label = "Task Name" p._xaxis.axis_label_text_font_size = "12pt" p._yaxis.axis_label = "Exit Count (sum)" p._yaxis.axis_label_text_font_size = "12pt" # Cannot find a way to display the exit reason in the tooltip # from bokeh.models.renderers import GlyphRenderer # glr = p.select(dict(type=GlyphRenderer)) # bar_source = glr[0].data_source # print bar_source.data # bar_source = glr[1].data_source # bar_source.data['exit_reason'] = ['HOHO'] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("task", "$x"), # {"reason", "@exit_reason"}, ("count", "@height") ]) # specify how to output the plot(s) # table with counts gb = df.groupby(['exit_reason']) keys = gb.groups.keys() dfr_list = [] for reason in keys: dfr = gb.get_group(reason) # drop the exit reason column dfr = dfr.drop(['exit_reason'], axis=1) # rename the count column with the reason name dfr.rename(columns={'count': reason}, inplace=True) # set the task name as the index dfr.set_index('task_name', inplace=True) dfr_list.append(dfr) # concatenate all task columns into 1 dataframe that has the exit reason as the index # counts for missing exit reasons will be set to NaN dft = pandas.concat(dfr_list, axis=1) dft.fillna(0, inplace=True) # Add a total column dft['TOTAL'] = dft.sum(axis=1) sfmt = StringFormatter(text_align='center', font_style='bold') nfmt = NumberFormatter(format='0,0') col_names = list(dft.columns.values) col_names.sort() # move 'TOTAL' at end of list col_names.remove('TOTAL') col_names.append('TOTAL') # convert index to column name dft.reset_index(level=0, inplace=True) dft.rename(columns={'index': 'Task'}, inplace=True) columns = [TableColumn(field=name, title=name, formatter=nfmt) for name in col_names] columns.insert(0, TableColumn(field='Task', title='Task', formatter=sfmt)) table = DataTable(source=ColumnDataSource(dft), columns=columns, width=1000, row_headers=False, height='auto') output_html(vplot(p, table), 'kvm-types', task_re) '''
def show_sw_kvm_heatmap(df, task_re, label, show_ctx_switches, show_kvm): gb = get_groupby(df, task_re) chart_list = [] # these are the 2 main events to show for kvm events legend_map_kvm = { 'kvm_exit': (BLUE, 'vcpu running (y=vcpu run time)', False), 'kvm_entry': (ORANGE, 'vcpu not running (y=kvm+sleep time)', False) } # context switch events legend_map_ctx_sw = { 'sched__sched_stat_sleep': (RED, 'wakeup from sleep (y=sleep time)', True), 'sched__sched_switch': (GREEN, 'switched out from cpu (y=run time)', True) } if show_kvm and show_ctx_switches: legend_map = dict(legend_map_kvm.items() + legend_map_ctx_sw.items()) title = "Scheduler and KVM events" prefix = 'swkvm' elif show_kvm: legend_map = legend_map_kvm title = "KVM events" prefix = 'kvm' else: legend_map = legend_map_ctx_sw title = "Scheduler events" prefix = 'sw' width = 1000 height = 800 show_legend = True nb_charts = len(gb.groups) if nb_charts == 0: print 'No selection matching: ' + task_re return if nb_charts > 1: width /= 2 height /= 2 tstyle = grid_title_style else: tstyle = title_style task_list = gb.groups.keys() task_list.sort() show_legend = True duration_max = usecs_max = -1 duration_min = usecs_min = sys.maxint event_list = legend_map.keys() event_list.sort() for task in task_list: p = figure(plot_width=width, plot_height=height, y_axis_type="log", **tstyle) p.xaxis.axis_label = 'time (usecs)' p.yaxis.axis_label = 'duration (usecs)' p.legend.orientation = "bottom_right" p.xaxis.axis_label_text_font_size = "10pt" p.yaxis.axis_label_text_font_size = "10pt" if label: p.title = "%s for %s (%s)" % (title, task, label) label = None else: p.title = task p.ygrid.minor_grid_line_color = 'navy' p.ygrid.minor_grid_line_alpha = 0.1 accumulated_time = {} total_time = 0 dfg = gb.get_group(task) # remove any row with zero duration as it confuses the chart library dfg = dfg[dfg['duration'] > 0] for event in event_list: dfe = dfg[dfg.event == event] duration_min = min(duration_min, dfe['duration'].min()) duration_max = max(duration_max, dfe['duration'].max()) usecs_min = min(usecs_min, dfe['usecs'].min()) usecs_max = max(usecs_max, dfe['usecs'].max()) count = len(dfe) color, legend_text, cx_sw = legend_map[event] if show_legend: legend_text = '%s (%d)' % (legend_text, count) elif color == GREEN: legend_text = '(%d)' % (count) else: legend_text = None # there is bug in bokeh when there are too many circles to draw, nothing is visible if len(dfe) > 50000: dfe = dfe[:50000] print 'Series for %s display truncated to 50000 events' % (event) if cx_sw: draw_shape = p.circle size = get_disc_size(count) else: draw_shape = p.diamond size = get_disc_size(count) + 4 draw_shape('usecs', 'duration', source=ColumnDataSource(dfe), size=size, color=color, alpha=0.3, legend=legend_text) event_duration = dfe['duration'].sum() accumulated_time[event] = event_duration total_time += event_duration chart_list.append(p) show_legend = False shared_x_range = Range1d(usecs_min, usecs_max) shared_y_range = Range1d(duration_min, duration_max) for p in chart_list: p.x_range = shared_x_range p.y_range = shared_y_range # specify how to output the plot(s) # display the figure if len(chart_list) == 1: output_html(chart_list[0], prefix, task_re) else: # split the list into an array of rows with 2 charts per row gp = gridplot(split_list(chart_list, 2)) output_html(gp, prefix, task_re)
def show_kvm_exit_types(df, task_re, label): df = df[df['event'] == 'kvm_exit'] df = df[df['task_name'].str.match(task_re)] # the next_comm column contains the exit code exit_codes = Series(KVM_EXIT_REASONS) # add new column congaining the exit reason in clear text df['exit_reason'] = df['next_comm'].map(exit_codes) time_span_msec = get_time_span_msec(df) df.drop( ['cpu', 'duration', 'event', 'next_pid', 'pid', 'next_comm', 'usecs'], inplace=True, axis=1) # Get the list of exit reasons, sorted alphabetically reasons = pandas.unique(df.exit_reason.ravel()).tolist() reasons.sort() # group by task name then exit reasons gb = df.groupby(['task_name', 'exit_reason']) # number of exit types size_series = gb.size() df = size_series.to_frame('count') df.reset_index(inplace=True) p = Bar(df, label='task_name', values='count', stack='exit_reason', title="KVM Exit types per task (%s, %d msec window)" % (label, time_span_msec), legend='top_right', tools="resize,hover,save", width=1000, height=800) p._xaxis.axis_label = "Task Name" p._xaxis.axis_label_text_font_size = "12pt" p._yaxis.axis_label = "Exit Count (sum)" p._yaxis.axis_label_text_font_size = "12pt" # Cannot find a way to display the exit reason in the tooltip # from bokeh.models.renderers import GlyphRenderer # glr = p.select(dict(type=GlyphRenderer)) # bar_source = glr[0].data_source # print bar_source.data # bar_source = glr[1].data_source # bar_source.data['exit_reason'] = ['HOHO'] hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("task", "$x"), # {"reason", "@exit_reason"}, ("count", "@height") ]) # specify how to output the plot(s) # table with counts gb = df.groupby(['exit_reason']) keys = gb.groups.keys() dfr_list = [] for reason in keys: dfr = gb.get_group(reason) # drop the exit reason column dfr = dfr.drop(['exit_reason'], axis=1) # rename the count column with the reason name dfr.rename(columns={'count': reason}, inplace=True) # set the task name as the index dfr.set_index('task_name', inplace=True) dfr_list.append(dfr) # concatenate all task columns into 1 dataframe that has the exit reason as the index # counts for missing exit reasons will be set to NaN dft = pandas.concat(dfr_list, axis=1) dft.fillna(0, inplace=True) # Add a total column dft['TOTAL'] = dft.sum(axis=1) sfmt = StringFormatter(text_align='center', font_style='bold') nfmt = NumberFormatter(format='0,0') col_names = list(dft.columns.values) col_names.sort() # move 'TOTAL' at end of list col_names.remove('TOTAL') col_names.append('TOTAL') # convert index to column name dft.reset_index(level=0, inplace=True) dft.rename(columns={'index': 'Task'}, inplace=True) columns = [ TableColumn(field=name, title=name, formatter=nfmt) for name in col_names ] columns.insert(0, TableColumn(field='Task', title='Task', formatter=sfmt)) table = DataTable(source=ColumnDataSource(dft), columns=columns, width=1000, row_headers=False, height='auto') output_html(vplot(p, table), 'kvm-types', task_re) '''