def init_mention_plot(): cprint('%s: init @ mentios barcharts...' % TAG, 'yellow', attrs=['bold']) global mention_count global mention_barplot mention_count = count_mentions() y = [] mentions = [] for (mention, freq) in mention_count: y.append(freq) mentions.append(mention) x = np.arange(len(mentions)) source = ColumnDataSource(dict( x=x, top=y, )) glyph = VBar(x='x', top='top', bottom=0, width=0.85, fill_color='#ff7f0e') mention_barplot.add_glyph(source, glyph) xaxis = LinearAxis() xaxis.ticker = x xaxis.major_label_overrides = { i: mention for i, mention in enumerate(mentions) } mention_barplot.add_layout(xaxis, 'below') mention_barplot.xaxis.major_label_orientation = +np.pi / 2 yaxis = LinearAxis() yaxis.axis_label = 'Overall number of @ mentions' yaxis.axis_label_text_font_size = '14pt' yaxis.ticker = np.linspace(0, max(y), 11, dtype=np.int)[1:] mention_barplot.add_layout(yaxis, 'left') mention_barplot.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) mention_barplot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))
def init_user_plot(): cprint('%s: init @ users barcharts...' % TAG, 'yellow', attrs=['bold']) global user_count global user_barplot user_count = count_users() y = [] users = [] for (user, freq) in user_count.items(): y.append(freq) users.append(user) x = np.arange(len(users)) wlist = [] for user in users: prefixes = ['@' + user] for prefix, freq in sorted(user_tweet_freq[user].items(), key=lambda kv: kv[1], reverse=True)[:10]: prefixes.append(' %s:%d' % (prefix, freq)) wlist.append(list(prefixes)) source = ColumnDataSource(dict(x=x, top=y, wlist=wlist)) glyph = VBar(x='x', top='top', bottom=0, width=0.85, fill_color='#1f77b4') user_barplot.add_glyph(source, glyph) xaxis = LinearAxis() xaxis.ticker = x xaxis.major_label_overrides = { i: '@' + user for i, user in enumerate(users) } #xaxis.major_label_standoff = -35 user_barplot.add_layout(xaxis, 'below') user_barplot.xaxis.major_label_orientation = +np.pi / 2 yaxis = LinearAxis() yaxis.axis_label = 'Overall number of tweets per @' yaxis.axis_label_text_font_size = '14pt' yaxis.ticker = np.linspace(0, max(y), 11, dtype=np.int)[1:] user_barplot.add_layout(yaxis, 'left') user_barplot.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) user_barplot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))
def init_plot(): cprint('%s: init barcharts per neighbourhood...' % TAG, 'yellow', attrs=['bold']) init_wordcount() global prefix_count global wword_count global word_barplots global word_sources global mapper global svg_div init_user_plot() init_mention_plot() count_words(prefix_count, wword_count) # update colorbar min_freq, max_freq = get_freq_range(prefix_count) mapper['transform'].low = min_freq mapper['transform'].high = max_freq # update colorbar tickers steps = 13 while max_freq < steps: steps = steps // 2 color_bar.ticker = FixedTicker( ticks=np.linspace(min_freq, max_freq, steps, dtype=np.int)) y = np.arange(config.NUM_WORD_BARS) for i, (neigh, wcount) in enumerate(prefix_count.items()): wordfreqlist = sorted(wcount.items(), key=lambda kv: kv[1], reverse=True) x = [] prefixes = [] wlist = [] for prefix, freq in wordfreqlist[:config.NUM_WORD_BARS]: x.append(freq) prefixes.append(prefix) wlist.append([' %s:%d' % (k,v) \ for (k,v) in sorted(wword_count[neigh][prefix].items(), key=lambda kv:kv[1], reverse=True)[:5]]) color_index = np.round( minmax_scale([np.mean(x[:5]), min_freq, max_freq], feature_range=(0, 5))[0]) map_fill_color = Spectral6[np.int(color_index)] svg.change_fill_color(neigh.replace(' ', ''), map_fill_color) plt = word_barplots[i] src = word_sources[i] src.data = dict(y=y, right=x, wlist=wlist) t = Title() t.text = neigh.title()[:13] plt.title = t glyph = HBar(y='y', right='right', left=0, height=0.90, fill_color=mapper) word_hbarglyphs.append(glyph) plt.add_glyph(src, glyph) xaxis = LinearAxis() xaxis.ticker = np.linspace(0, max(x), 5, dtype=np.int)[1:] plt.add_layout(xaxis, 'below') plt.xaxis.major_label_orientation = +np.pi / 2 yaxis = LinearAxis() yaxis.ticker = y yaxis.major_label_overrides = { i: prefix for i, prefix in enumerate(prefixes) } yaxis.major_label_standoff = -35 plt.add_layout(yaxis, 'left') plt.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) plt.add_layout(Grid(dimension=1, ticker=yaxis.ticker)) svg_div.text = svg.to_string()
def parallel_plot(df, axes, color=None, palette=None): """From a dataframe create a parallel coordinate plot """ logger = logging.getLogger('cave.plot.parallel.plot') npts = df.shape[0] ndims = len(df.columns) if color is None: color = np.ones(npts) if palette is None: palette = ['#ff0000'] cmap = LinearColorMapper(high=color.min(), low=color.max(), palette=palette) data_source = ColumnDataSource( dict(xs=np.arange(ndims)[None, :].repeat(npts, axis=0).tolist(), ys=np.array((df - df.min()) / (df.max() - df.min())).tolist(), color=color)) p = figure(x_range=(-1, ndims), y_range=(0, 1), width=800, tools="pan, box_zoom") # Create x axis ticks from columns contained in dataframe fixed_x_ticks = FixedTicker(ticks=np.arange(ndims), minor_ticks=[]) formatter_x_ticks = FuncTickFormatter(code="return columns[index]", args={"columns": df.columns}) p.xaxis.ticker = fixed_x_ticks p.xaxis.formatter = formatter_x_ticks p.yaxis.visible = False p.y_range.start = 0 p.y_range.end = 1 p.y_range.bounds = (-0.1, 1.1) # add a little padding around y axis p.xgrid.visible = False p.ygrid.visible = False # Create extra y axis for each dataframe column tickformatter = BasicTickFormatter(precision=1) for index, col in enumerate(df.columns): if col in axes: start = axes[col]['lower'] end = axes[col]['upper'] else: logger.warning( "Parallel plot didn't receive information about the axes. " "This will likely fail for categorical data") start = df[col].min() end = df[col].max() if np.isnan(start) or np.isnan(end): raise ValueError( "NaN's not allowed in limits of axes! %s: (%s, %s)" % (col, str(start), str(end))) logger.debug('Limits for %s are (%s, %s)' % (col, start, end)) bound_min = start + abs(end - start) * (p.y_range.bounds[0] - p.y_range.start) bound_max = end + abs(end - start) * (p.y_range.bounds[1] - p.y_range.end) p.extra_y_ranges.update({ col: Range1d(start=bound_min, end=bound_max, bounds=(bound_min, bound_max)) }) num_ticks = 8 if not 'choices' in axes[col] else len( axes[col]['choices']) fixedticks = FixedTicker(ticks=np.linspace(start, end, num_ticks), minor_ticks=[]) axis = LinearAxis(fixed_location=index, y_range_name=col, ticker=fixedticks, formatter=tickformatter) if 'choices' in axes[col]: # Note, override-dicts need to be created on assign (https://github.com/bokeh/bokeh/issues/8166) axis.major_label_overrides = { i: v for i, v in enumerate(axes[col]['choices']) } p.add_layout(axis, 'right') # create the data renderer ( MultiLine ) # specify selected and non selected stylew non_selected_line_style = dict(line_color='grey', line_width=0.1, line_alpha=0.5) selected_line_style = dict(line_color={ 'field': 'color', 'transform': cmap }, line_width=1) parallel_renderer = p.multi_line(xs="xs", ys="ys", source=data_source, **non_selected_line_style) # Specify selection style selected_lines = MultiLine(**selected_line_style) # Specify non selection style nonselected_lines = MultiLine(**non_selected_line_style) parallel_renderer.selection_glyph = selected_lines parallel_renderer.nonselection_glyph = nonselected_lines p.y_range.start = p.y_range.bounds[0] p.y_range.end = p.y_range.bounds[1] rect_source = ColumnDataSource({ 'x': [], 'y': [], 'width': [], 'height': [] }) # add rectange selections selection_renderer = p.rect(x='x', y='y', width='width', height='height', source=rect_source, fill_alpha=0.7, fill_color='#009933') selection_tool = ParallelSelectionTool(renderer_select=selection_renderer, renderer_data=parallel_renderer, box_width=10) # custom resets (reset only axes not selections) reset_axes = ParallelResetTool() # add tools and activate selection ones p.add_tools(selection_tool, reset_axes) p.toolbar.active_drag = selection_tool return p
def timeline_days_hours(interval_frequency, all_co2_dataframe, sensors_with_anomalies, data, upper_bound, destination_path): times = pd.date_range(start='00:00:00', end='23:55:00', freq=str(interval_frequency) + 'Min').strftime('%H:%M:%S') days = all_co2_dataframe['timestamp'].dt.strftime('%Y-%m-%d').to_list() days = list(dict.fromkeys(days)) timestamp = list(times) for sensor_name in sensors_with_anomalies: all_durations = [] for k in data['anomalies']['anomaly_co2_values'][sensor_name][0]: k = data['anomalies']['anomaly_co2_values'][sensor_name][0].index( k) all_durations.append( int(data['anomalies']['anomaly_co2_values'][sensor_name][0][k] ['duration'])) data['anomalies']['anomaly_co2_values'][sensor_name][0] p = figure(plot_height=500, plot_width=2000, x_range=timestamp, y_range=days, title='Timeline of periods with CO2 levels higher than ' + str(upper_bound) + ' ppm in ' + sensor_name + '\n Based on data for last ' + str(len(days)) + ' days', active_drag=None, toolbar_location=None) p.x_range.range_padding = 0 p.y_range.range_padding = 0 p.title.text_font_size = '15pt' p.xaxis.axis_label_text_font_size = "15pt" p.yaxis.axis_label_text_font_size = "15pt" p.yaxis.major_label_text_font_size = '9pt' p.xaxis.major_label_text_font_size = '5pt' # set x axis to invisible p.xaxis.visible = False # Add custom axis with tickers labels only every 1 hour labels = np.arange(0, 288, 12).tolist() ticker = FixedTicker() ticker.ticks = labels xaxis = LinearAxis(ticker=ticker) xaxis.major_label_orientation = math.pi / 3 p.add_layout(xaxis, 'below') xaxis.major_label_overrides = { 0: '00:00', 12: '01:00', 24: '02:00', 36: '3:00', 48: '04:00', 60: '05:00', 72: '06:00', 84: '07:00', 96: '08:00', 108: '09:00', 120: '10:00', 132: '11:00', 144: '12:00', 156: '13:00', 168: '14:00', 180: '15:00', 192: '16:00', 204: '17:00', 216: '18:00', 228: '19:00', 240: '20:00', 252: '21:00', 264: '22:00', 276: '23:00' } #add anomalies recorded for each day in considered period for i in data['anomalies']['anomaly_co2_values'][sensor_name][0]: i = data['anomalies']['anomaly_co2_values'][sensor_name][0].index( i) x = [] y = [] x.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i] ['anomalies_details'][0][0][11:]) x.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i] ['anomalies_details'][-1][0][11:]) y.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i] ['anomalies_details'][0][0][0:10]) y.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i] ['anomalies_details'][-1][0][0:10]) if data['anomalies']['anomaly_co2_values'][sensor_name][0][i][ 'anomalies_details'][0][0][0:10] == data['anomalies'][ 'anomaly_co2_values'][sensor_name][0][i][ 'anomalies_details'][-1][0][0:10]: # print('yes') p.line(x, y, line_width=2, color='blue', legend_label='CO2 above critical value') p.circle(x, y, fill_color="blue", line_color='blue', size=5) else: x1 = x.copy() y1 = y.copy() x2 = x.copy() y2 = y.copy() x1[-1] = times[-1] y1[-1] = y1[0] p.line(x1, y1, line_width=2, color='blue') x2[0] = times[0] y2[0] = y2[-1] p.line(x2, y2, line_width=2, color='blue') x3 = [x1[0], x2[-1]] y3 = [y1[0], y2[-1]] p.circle(x3, y3, fill_color="blue", line_color='blue', size=5) #save graph in output location output_file(destination_path + '/anomalies_timeline_' + sensor_name + '.html') save(p) return p