def plotting(*name): try: df = plot_df.copy() except: print 'run interval_rate() first' return from bokeh.io import output_file, output_notebook, show from bokeh.charts import Line from bokeh.layouts import row, column p = Line(df[['day1', 'day7', 'matched']], plot_width=950, plot_height=400, legend='top_right') # q=Line(df[['matched']],plot_width=950,plot_height=400,color='blue') r = Line(df[['single_day_retention']], plot_width=950, plot_height=400, color='purple') layout = column(p, r) if len(name) > 0: output_file(name[0] + '.html') show(layout) else: output_notebook() show(layout)
def fnCreate_Chart_Line(df): pd.options.html.border = 1 plot = Line(df, title="Date wise tweet graph", legend=False, xlabel='Date', ylabel='Count') #, 'green', 'blue'] plot.logo = None script, div = components(plot, CDN) return script, div
def fnCreate_Chart_MultiLine(df): pd.options.html.border = 1 plot = Line(df, title="Likes vs retweets visualization", legend="top_left", xlabel='Date', ylabel='Count') plot.logo = None script, div = components(plot, CDN) return script, div
def createBokehChart(self): keyFields = self.getKeyFields() valueFields = self.getValueFields() data = self.getWorkingPandasDataFrame().sort_values(keyFields[0]) subplots = self.options.get("lineChartType", "grouped") == "subplots" clusterby = self.options.get("clusterby") figs = [] if clusterby is None: if subplots: for valueField in valueFields: figs.append( Line(data, x=keyFields[0], y=valueField, legend=self.showLegend(), plot_width=int(800 / len(valueFields)))) else: figs.append( Line(data, x=keyFields[0], y=valueFields, color=valueFields, legend=self.showLegend())) else: if subplots: self.addMessage( "Warning: 'Cluster By' ignored when you have multiple Value Fields but subplots options selected" ) for valueField in valueFields: figs.append( Line(data, x=keyFields[0], y=valueField, legend=self.showLegend(), plot_width=int(800 / len(valueFields)))) else: if len(valueFields) > 1: self.addMessage( "Warning: 'Cluster By' ignored when you have multiple Value Fields but subplots option is not selected" ) else: self.addMessage( "Warning: 'Cluster By' ignored when grouped option with multiple Value Fields is selected" ) figs.append( Line(data, x=keyFields[0], y=valueFields, color=valueFields, legend=self.showLegend())) return figs
def fnCreate_Chart_MultiLine(df,strS,j=1): pd.options.html.border=1 sL=False if j ==1: sL="top_center" plot = Line(df, title="Average of Ticket Price Date wise" + strS, legend=sL, xlabel='Travel Date',ylabel='Average Ticket Price') plot.legend.label_text_font_size = "7pt" plot.legend.orientation = "horizontal" plot.legend.click_policy="hide" plot.logo=None script, div = components(plot,CDN) return script, div
def plot_state(state_data, varaible_names=[]): # plot = figure(title='State Variables', x_axis_label='step', y_axis_label='level') # data = [] data = dict() for v in varaible_names: data[v] = state_data[v] # plot.line(range(len(data[v])), data[v], legend=v) # xyvalues = np.array([[2, 3, 7, 5, 26], [12, 33, 47, 15, 126], [22, 43, 10, 25, 26]]) # xyvalues = np.array(data) plot = Step(data, title="state variables - step graph", legend="top_left", ylabel='', palette=["red", "green", "blue", "orange"]) plot_line = Line(data, title="state variables - line graph", legend="top_left", ylabel='', palette=["red", "green", "blue", "orange"]) # output_file('line.html') # show(plot) return (plot, plot_line)
def abc_model(abc_id, model_id, t): history = app.config["HISTORY"] history.id = abc_id if t == "max": t = history.max_t else: t = int(t) df, w = history.get_distribution(model_id, t) df["CDF"] = w tabs = [] model_ids = history.get_model_probabilities().columns for parameter in [col for col in df if col != "CDF"]: plot_df = df[["CDF", parameter]].sort_values(parameter) plot_df_cumsum = plot_df.cumsum() plot_df_cumsum[parameter] = plot_df[parameter] p = Panel(child=Line(x=parameter, y="CDF", data=plot_df_cumsum), title=parameter) tabs.append(p) if len(tabs) == 0: plot = PlotScriptDiv("", "This model has no Parameters") else: plot = PlotScriptDiv(*components(Tabs(tabs=tabs))) return render_template("model.html", abc_id=abc_id, model_id=model_id, plot=plot, BOKEH=BOKEH, model_ids=model_ids, t=t, available_t=list(range(history.max_t + 1)))
def loss_accuracy_plot(df, x, y): df = df.fillna(0) plot = Row(*[Line(df, x, y, legend=True) for y in y]) script, div = components(plot, INLINE) js_resources = RESOURCE.render_js() css_resources = RESOURCE.render_css() return Plot(js_resources, css_resources, script, div)
def line(self, dataframe, x=None, y=None, width=None, height=None, groups=None, palette=None, title="Line", xaxis_label=None, yaxis_label=None): palette = self.__default_options__.get( 'palette', None) if palette is None else palette width = self.__default_options__.get('width', None) if width is None else width width, height = self._width_height(width, height) line = Line(dataframe, x=x, y=y, color=groups, width=width, height=height, palette=palette, title=title, legend=True) if xaxis_label: line._xaxis.axis_label = xaxis_label if yaxis_label: line._yaxis.axis_label = yaxis_label return line
def line(self, dataframe, width=None, height=None, palette=None, title="Line", x_axis_label=None, y_axis_label=None, grid=None): palette = self.get_option('palette') if palette is None else palette width = self.get_option('width') if width is None else width if not height: width, height = self.golden_ratio(width, height) palette = self._palette(palette, len(dataframe.index)) line = Line(dataframe.T, legend="top_right", color=palette, ylabel=y_axis_label, xlabel=y_axis_label, title=title, width=width, height=height) if grid is not None: grid.append(line) return grid return line
def doPlot11(data, nrDataSource): p = Line(data, title="Line graph: " + nrDataSource['name'], xlabel=data.columns[0], ylabel=data.columns[1], responsive=True) c = components(p, resources=None, wrap_script=False, wrap_plot_info=True) return c
def showBacktestingResult(self): """显示回测结果""" d = self.calculateBacktestingResult() # 输出 self.output('-' * 30) self.output('First Trade:\t%s' % d['timeList'][0]) self.output('Last Trade:\t%s' % d['timeList'][-1]) self.output('Total Trades:\t%s' % formatNumber(d['totalResult'])) self.output('Total Return:\t%s' % formatNumber(d['capital'])) self.output('Maximum Drawdown: \t%s' % formatNumber(min(d['drawdownList']))) self.output('Ave Trade:\t%s' % formatNumber(d['capital'] / d['totalResult'])) self.output('Ave Slippage:\t%s' % formatNumber(d['totalSlippage'] / d['totalResult'])) self.output('Ave Commission:\t%s' % formatNumber(d['totalCommission'] / d['totalResult'])) self.output('Win Ratio\t\t%s%%' % formatNumber(d['winningRate'])) self.output('Ave Win\t%s' % formatNumber(d['averageWinning'])) self.output('Ave Loss\t%s' % formatNumber(d['averageLosing'])) self.output('Profit Factor:\t%s' % formatNumber(d['profitLossRatio'])) # Use Bokeh to plot from bokeh.charts import Area, Line, Histogram from bokeh.layouts import column from bokeh.io import show plotdata = { "TradeN": range(len(d['capitalList'])), "Equity Curve": d['capitalList'], "Maximum Drawdown": d['drawdownList'], "Profit/Loss": d['pnlList'] } f1 = Line(plotdata, x="TradeN", y="Equity Curve", color="blue", width=1000, height=300) f2 = Area(plotdata, x="TradeN", y="Maximum Drawdown", color="tomato", width=1000, height=300) f3 = Histogram(plotdata, values="Profit/Loss", bins=30, color="green", width=1000, height=300) show(column(f1, f2, f3))
def create_line(data): """ Convenience function to create a new line chart with the right args """ return Line(data, x='year', y=countries, legend=True, width=1400, height=300, ylabel='Energy use per capita', palette=['purple', 'green', 'blue', 'pink'])
def plot_close(tickr): qd.ApiConfig.api_key = "FtpsTU3J-q2x2pa2KBqV" tickrfinal=''.join(('WIKI/',tickr)) mydata=qd.get(tickrfinal,start_date="2017-5-1", end_date="2017-5-31") print list(mydata) y=mydata['Close'] #output_notebook() p = Line(y, title="Closing Price/ Day in May",width=500, height=400,xlabel='Dates', ylabel='Closing Price') #output_file("templates\closing.html", title="Closing Price in May") return p
def plot_column(data, column_name, save=True, display=True): plot = Line(data, y=column_name, plot_width=1200, plot_height=600) if save: output_file('plots/' + column_name + '.html') if display: show(plot) return plot
def doPlot11(data, nrDataSource): p = Line(data, y_mapper_type="log", x=data.columns[0], xlabel=data.columns[0], ylabel=data.columns[1], title="Line graph: " + nrDataSource['name'], responsive=True) p._xaxis.ticker = SingleIntervalTicker(interval=5, num_minor_ticks=10) c = components(p, resources=None, wrap_script=False, wrap_plot_info=True) return c
def trying(): now=datetime.datetime.now().strftime("%Y%m%d") now_30=(datetime.datetime.now()-datetime.timedelta(days=30)).strftime("%Y%m%d") tick=app.vars stock_data = requests.get("https://www.quandl.com/api/v3/datatables/WIKI/PRICES.json?date.gte="+now_30+"&date.lt="+now+"&ticker="+tick+"&api_key=yvSB52TFjUsFTyZU-n--") unp_data=stock_data.json()["datatable"]["data"] unp_label=stock_data.json()["datatable"]["columns"] label=[] for entries in unp_label: label.append(str(entries["name"])) df=pandas.DataFrame(unp_data) df.columns=label close_mean="%.2f" %df[["close"]].mean() close_std="%.2f" %df[["close"]].std() captions="mean="+ close_mean +", std="+ close_std df2=df[["date","close"]] li=Line(df2,x="date",y="close",xlabel="date",ylabel="closing price",legend=False,title=tick+" stock chart from "+now_30+" to "+now) caption1=Label(x=400, y=100, x_units='screen', y_units='screen', text=captions, text_font_size='9pt', text_font_style="bold") li.add_layout(caption1) script, div = components(li) return render_template('trying.html', script=script, div=div)
def HLevelLine(): # sample = pd.read_table('http://kelesidis.de/static/data/sample.txt') # Use the Sample Data # Make a list with the months ys = [month for month in sample.columns[1:]] TOOLS = 'pan,wheel_zoom,hover,crosshair,resize,reset' TOOLTIPS = [("Year", "$~x"), ("Temp", "$y")] # make our line configurations # import from df, x= the Year column, y = the ys list we created above p = Line(sample, x = 'Year', y = ys, title = "Hight Level Bokeh Line Chart", legend = "bottom_left", ylabel = 'Temp', tools = TOOLS, width = 600, height = 450, responsive = True) # hover tool configuration p_hover = p.select(HoverTool) p_hover.tooltips = TOOLTIPS p.logo = None return p
def main(): # check recalculation request if 'recalculate' in request.args: if request.args.get('recalculate') == 'True': betalyzer.recalculate() # build sector betas bar chart sector_betas = betalyzer.df_tickers.groupby('sector')['beta'].mean() bk_sector_betas = Bar(sector_betas, plot_width=550, plot_height=400, legend=None) bk_sector_betas_script, bk_sector_betas_div = components(bk_sector_betas) # build market cap betas bar chart mktcap_betas = betalyzer.df_tickers.groupby( 'market_cap_decile')['beta'].mean() bk_mc_betas = Bar(mktcap_betas, plot_width=550, plot_height=400, legend=None) bk_mc_betas_script, bk_mc_betas_div = components(bk_mc_betas) # build market cap scatter plot scatter = Scatter(betalyzer.df_tickers, x='market_cap_log', y='beta', plot_width=550, plot_height=400) scatter_script, scatter_div = components(scatter) # build line plot for top three stocks top_tickers = betalyzer.df_tickers['ticker'].head(3) bk_history = Line(betalyzer.df_betas[top_tickers], plot_width=550, plot_height=400) bk_history_script, bk_history_div = components(bk_history) return render_template( 'main.html', dt_tickers=betalyzer.df_tickers.to_dict(orient='records'), bk_sector_betas_script=bk_sector_betas_script, bk_sector_betas_div=bk_sector_betas_div, bk_mc_betas_script=bk_mc_betas_script, bk_mc_betas_div=bk_mc_betas_div, scatter_script=scatter_script, scatter_div=scatter_div, bk_history_script=bk_history_script, bk_history_div=bk_history_div)
def get_variable(config): from bokeh.embed import components name, rank, df, decade_df = get_result(config.gender, config.decade, config.name) if config.name <> name: message = "%s not found. Do you mean %s?" % (config.name, name) else: message ="" birth_table = df.pivot(index='Decade', columns='Name', values='Births').fillna(0).to_html() rank_table = df.pivot(index='Decade', columns='Name', values='Rank').fillna(0).to_html() result_table = df[df['Rank']==rank][['Decade','Name','Rank']].sort('Decade').to_html(index=False) top_table = decade_df[(decade_df['Rank']<=8) & (decade_df['Decade']==config.decade)].sort('Rank').to_html(index=False) from bokeh.charts import Line, save, output_file, ColumnDataSource from bokeh.resources import INLINE plot_path = "%s_%s_%i.html" % (config.name,config.gender,config.decade) output_file("output/" + plot_path, mode='inline') tooltips = [(c, '@' + c) for c in df.columns] p = Line(df, x='Decade', y='Rank', title="Rank across Time", color='Name', xlabel="Decade", ylabel="Rank", tooltips=tooltips) p.circle('Decade', 'Rank', color='gray', alpha=0.5, source=ColumnDataSource(df)) save(p) #script, div = components(p) return { 'plot_path': plot_path, 'result_table': result_table, 'rank_table': rank_table, 'birth_table': birth_table, 'top_table': top_table, 'rank': rank, 'config': config, 'name': name, 'message': message }
def getPlot(ticker): ticker = 'GOOG' r = requests.get('https://www.quandl.com/api/v3/datasets/WIKI/' + ticker + '/data.json?api_key=Eebg1xGXqxhS11D52xGs') asJson = json.loads(r.content) dataFormated = pd.DataFrame(np.array(asJson['dataset_data']['data']), columns=asJson['dataset_data']['column_names']) dataFormated['Date'] = pd.to_datetime(dataFormated.Date) dataFormated['Open'] = pd.to_numeric(dataFormated.Open) #dates = matplotlib.dates.date2num(asDateTime.tolist()) #values = np.array(dataFormated.Open).astype(float) line = Line(dataFormated, x='Date', y='Open', title="Cool") #line = Line(x=dates,y=values,title="Cool") return line
def plot_plan_bokeh_2(plan): x = [] y = [] ys = [] i = 0 for (o, v) in plan: # s = str(o) + " - " + str(v) s = str(o) if s not in ys: ys.append(s) for (o, v) in plan: i += 1 x.append(i) # s = str(o) + " - " + str(v) s = str(o) y.append(ys.index(s)) # xyvalues = np.array([[2, 3, 7, 5, 26], [12, 33, 47, 15, 126], [22, 43, 10, 25, 26]]) xyvalues = np.array(y) line = Line(xyvalues, title="plan", legend="top_left", ylabel='operator') output_file('plots_html/line.html') show(line)
def ticker(ticker): # build line line = Line(betalyzer.df_betas[ticker], plot_width=1000, plot_height=400) bokeh_script, bokeh_div = components(line) # build scatter scatter = Scatter(betalyzer.df_changes.head(betalyzer.window), x=betalyzer.market, y=ticker, plot_width=550, plot_height=400) scatter_script, scatter_div = components(scatter) # build histogram df_hist = betalyzer.df_changes[[ticker, 'SPY']].head(500).unstack().reset_index() df_hist.rename(columns={'level_0': 'ticker', 0: 'change'}, inplace=True) hist = Histogram(df_hist, values='change', color='ticker', bins=20, legend='top_right', plot_width=550, plot_height=400) hist_script, hist_div = components(hist) return render_template( 'ticker.html', ticker=ticker, bokeh_script=bokeh_script, bokeh_div=bokeh_div, scatter_script=scatter_script, scatter_div=scatter_div, hist_script=hist_script, hist_div=hist_div, window=betalyzer.window, dt_ticker=betalyzer.df_tickers.loc[ticker].to_dict())
def generate_graph(): url = 'https://www.quandl.com/api/v3/datatables/WIKI/PRICES.json?' r = requests.get(url, app.selection) rjson = r.json()['datatable'] data = pd.DataFrame(rjson['data']) cols = pd.DataFrame(rjson['columns'])['name'] data.columns = cols def convert_to_datetime(val): y, m, d = val.split('-') return pd.datetime(int(y), int(m), int(d)) data['date'] = data['date'].apply(convert_to_datetime) data = data.set_index(['date']) t = 'Time series data for ' + app.selection['ticker'] p = Line(data, title=t, xlabel='date', ylabel='price ($)') app.s, app.d = components(p) # html = file_html(p,CDN,'Data sourced from Quandl WIKI dataset') # f = open('templates/graph.html','w') # f.write(html) # f.close() return
from collections import OrderedDict import time import numpy as np from bokeh.charts import Line, curdoc, cursession, output_server, show from bokeh.models import GlyphRenderer N = 80 x = np.linspace(0, 4*np.pi, N) xyvalues = OrderedDict(sin=np.sin(x), cos=np.cos(x)) output_server("line_animate") chart = Line(xyvalues, title="Lines", ylabel='measures') curdoc().add(chart) show(chart) renderer = chart.select(dict(type=GlyphRenderer)) ds = renderer[0].data_source while True: for i in np.hstack((np.linspace(1, -1, 100), np.linspace(-1, 1, 100))): for k, values in xyvalues.items(): if k != 'x': ds.data['y_%s'%k] = values * i cursession().store_objects(ds) time.sleep(0.05)
mydata.head() mydata.plot(figsize=(12, 8)) import seaborn as sns mydata.plot(figsize=(12, 8)) ##BOKEH from bokeh.plotting import show from bokeh.io import output_notebook from bokeh.charts import Line #doesn't work in spyder!! output_notebook() p = Line(mydata, legend='bottom_right') p.width = 600 p.height = 400 show(p) #HTML from bokeh.plotting import figure from bokeh.palettes import RdYlBu11 from bokeh.plotting import show, output_file, reset_output output_file("bokehplot.html") mytools = ['pan', 'box_zoom', 'resize', 'wheel_zoom', 'reset'] # Does not allow save and question options p = figure(width=800, height=600, tools=mytools, x_axis_type='datetime') cols = mydata.columns.values[:-1]
def make_plot(): sel_cols = [output_cols[i] for i in chkbx.active] p = Line(data=df, x='Date', y=sel_cols, plot_height=400) return p
import pandas as pd from bokeh.sampledata.degrees import data defaults.width = 500 defaults.height = 300 TOOLS='box_zoom,box_select,hover,crosshair,resize,reset' TOOLTIPS = [ ("y", "$~y"), ("x", "$~x") ] data = data[['Biology', 'Business', 'Computer Science', "Year"]] data = pd.melt(data, id_vars=['Year'], value_vars=['Biology', 'Business', 'Computer Science'], value_name='Count', var_name='Degree') vline = Line(data, y='Count', color='Degree', title="Lines VLine", ylabel='measures', tools=TOOLS) hline = Line(data, y='Count', color='Degree', title="Lines HLine", ylabel='measures', tools=TOOLS) int_vline = Line(data, y='Count', color='Degree', title="Lines VLine Interp", ylabel='measures', tools=TOOLS) int_hline = Line(data, y='Count', color='Degree', title="Lines HLine Interp", ylabel='measures', tools=TOOLS) scatter_point = Scatter(data, x='Year', y='Count', color='Degree', title="Scatter mouse", ylabel='measures', legend=True, tools=TOOLS) scatter = Scatter(data, x='Year', y='Count', color='Degree',
return breath, status, vol df = pd.read_csv(r'c:\Research_data\DH_data\test-840-1.txt', engine = 'python', skiprows = 2, skipfooter = 2, names = ['flow', 'paw', 'other']) df.reset_index(drop = False, inplace = True) df.drop(labels = 'other', axis = 1, inplace = True) df.flow = pd.to_numeric(df.flow, errors = 'coerce') df.paw = pd.to_numeric(df.paw, errors = 'coerce') df['breath'], df['status'], df['vol'] = count_breath(df.flow.values) df.dropna(axis = 0, how = 'all', subset = ['flow'], inplace = True) df.vol = df.groupby('breath')['vol'].cumsum() p = Line(df, x = 'index', y = ['flow', 'paw', 'vol', 'breath', 'status'], color = 'red') p.extra_y_ranges = {'flow': Range1d(start = 0, end = 30)} p.add_layout(LinearAxis(y_range_name = 'flow'), 'left') check_button = CheckboxButtonGroup(labels = ['Double Stacked', 'Flow Limited', 'Ineffective Trigger'], active = [0, 0, 0]) next_button = Button(label = 'Next', type = 'success') next_button.on_click(print('ok')) output_file('test.html') show(vform(p, check_button, next_button))
defaults.height = 300 TOOLS = 'box_zoom,box_select,hover,crosshair,resize,reset' TOOLTIPS = [("y", "$~y"), ("x", "$~x")] data = data[['Biology', 'Business', 'Computer Science', "Year"]] data = pd.melt(data, id_vars=['Year'], value_vars=['Biology', 'Business', 'Computer Science'], value_name='Count', var_name='Degree') vline = Line(data, y='Count', color='Degree', title="Lines VLine", ylabel='measures', tools=TOOLS) hline = Line(data, y='Count', color='Degree', title="Lines HLine", ylabel='measures', tools=TOOLS) int_vline = Line(data, y='Count', color='Degree', title="Lines VLine Interp", ylabel='measures',
from bokeh.models import HoverTool from bokeh.charts import Chart, Step, Line, Area, Scatter, Bar, vplot, hplot, show, output_file from py import path HERE = path.local(__file__).dirpath() xyvalues = pd.read_csv( str(HERE.join("percent-bachelors-degrees-women-usa.csv"))) index = xyvalues.pop("Year") xyvalues = xyvalues[['Biology', 'Business', 'Computer Science']] TOOLS = 'box_zoom,box_select,hover,crosshair,resize,reset' output_file("lines.html", title="line.py example") vline = Line(xyvalues, title="Lines VLine", ylabel='measures', width=500, height=300, tools=TOOLS) hline = Line(xyvalues, title="Lines HLine", ylabel='measures', width=500, height=300, tools=TOOLS) int_vline = Line(xyvalues, title="Lines VLine Interp", ylabel='measures', width=500, height=300, tools=TOOLS) int_hline = Line(xyvalues,
from collections import OrderedDict import numpy as np import pandas as pd from bokeh.charts import Line xyvalues = OrderedDict( python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111], pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130], jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160], ) # any of the following commented are valid Line inputs #xyvalues = pd.DataFrame(xyvalues) #xyvalues = xyvalues.values() #xyvalues = np.array(xyvalues.values()) line = Line(xyvalues, title="Lines", ylabel='measures', filename="lines.html") line.xlabel('time').legend("top_left").show()
import numpy as np import pandas as pd from bokeh.models import HoverTool from bokeh.charts import Chart, Step, Line, Area, Scatter, Bar, vplot, hplot, show, output_file from py import path HERE = path.local(__file__).dirpath() xyvalues = pd.read_csv(str(HERE.join("percent-bachelors-degrees-women-usa.csv"))) index = xyvalues.pop("Year") xyvalues = xyvalues[['Biology', 'Business', 'Computer Science']] TOOLS='box_zoom,box_select,hover,crosshair,resize,reset' output_file("lines.html", title="line.py example") vline = Line(xyvalues, title="Lines VLine", ylabel='measures', width=500, height=300, tools=TOOLS) hline = Line(xyvalues, title="Lines HLine", ylabel='measures', width=500, height=300, tools=TOOLS) int_vline = Line(xyvalues, title="Lines VLine Interp", ylabel='measures', width=500, height=300, tools=TOOLS) int_hline = Line(xyvalues, title="Lines HLine Interp", ylabel='measures', width=500, height=300, tools=TOOLS) svalues = {} # svalues['Business'] = [(i, v) for i, v in zip(index, xyvalues['Business'])] for k in xyvalues.columns: svalues[k] = [(i, v) for i, v in zip(index, xyvalues[k])] # # import pdb; pdb.set_trace() scatter_point = Scatter(svalues, title="Scatter mouse", ylabel='measures', width=500, height=300, legend=True, tools=TOOLS) scatter = Scatter(svalues, title="Scatter V Line", ylabel='measures', width=500, height=300,
# build a dataset where multiple columns measure the same thing data = dict( python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111], pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130], jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160], test=[ 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar' ]) df = pd.DataFrame(data) # add a column with a range of dates, as if the values were sampled then df['date'] = pd.date_range('1/1/2015', periods=len(df.index), freq='D') # default behavior for dataframe input is to plot each numerical column as a line line = Line(df) # build the line plots line0 = Line(df, y=['python', 'pypy', 'jython'], title="Interpreters (y=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True) line1 = Line(df, x='date', y=['python', 'pypy', 'jython'], title="Interpreters (x='date', y=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True)
def do_single_search(request_form): """ search method called from both welcome() and search() :param request_form: :return: """ search_terms = request_form["singleTermQuery"].lower() language_var, country_var = request_form["languageAndRegion"].split(':', 1) try: specific_query = simple_query_totals({"query": "body_text_ws:%s" % search_terms, "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]}) except (KeyError, HTTPError): return flask.render_template('no_results.html', query=search_terms, available_options=AVAILABLE_OPTIONS, search_mode='single') matches = specific_query['num_docs'].sum() ############################# # GET TOTALS FOR EVERYTHING # ############################# totals = simple_query_totals({"query": "*:*", "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]}) gender_totals = totals.groupby('gender').num_docs.sum() age_totals = totals.groupby('age').num_docs.sum() age_totals = sort_and_filter_age(age_totals) age_totals_norm = age_totals / age_totals.sum() age_and_gender_totals = prepare_age_and_gender(totals) # nuts_total = totals.groupby('nuts_3').num_docs.sum() ########### # GENDER # ########### gender_specific_query = specific_query.groupby('gender').num_docs.sum() abs_percentages = gender_specific_query / gender_totals try: renormalizer = 1.0 / abs_percentages.sum() except ZeroDivisionError: return flask.render_template('no_results.html', query=search_terms, available_options=AVAILABLE_OPTIONS, search_mode='single') gender_query_adjusted = abs_percentages * renormalizer ####### # AGE # ####### age_specific_query = specific_query.groupby('age').num_docs.sum() age_specific_query = sort_and_filter_age(age_specific_query) age_specific_query_norm = age_specific_query / age_specific_query.sum() compare_age_df = pd.DataFrame({'background distribution': age_totals_norm, 'query': pd.rolling_mean(age_specific_query_norm, ROLLING_MEAN_FRAME)}) compare_age_df['i'] = compare_age_df.index ################## # AGE AND GENDER # ################## age_and_gender_specific_query = prepare_age_and_gender(specific_query) try: age_specific_male_totals = gender_specific_query['M'].sum() compare_male_df = pd.DataFrame({'background distribution': age_and_gender_totals['M'], 'query': pd.rolling_mean(age_and_gender_specific_query['M'], ROLLING_MEAN_FRAME)}) except KeyError: age_specific_male_totals = 0 compare_male_df = pd.DataFrame({'background distribution': age_and_gender_totals['M']}) compare_male_df['i'] = compare_male_df.index try: age_specific_female_totals = gender_specific_query['F'] compare_female_df = pd.DataFrame({'background distribution': age_and_gender_totals['F'], 'query': pd.rolling_mean(age_and_gender_specific_query['F'], ROLLING_MEAN_FRAME)}) except KeyError: age_specific_female_totals = 0 compare_female_df = pd.DataFrame({'background distribution': age_and_gender_totals['F']}) compare_female_df['i'] = compare_female_df.index ######## # NUTS # ######## nuts_query = specific_query.groupby('nuts_3').num_docs.sum() nuts_total = nuts_query.sum() nuts_query_norm = nuts_query / nuts_total special_regions = nuts_query_norm > nuts_query_norm.median() outliers = ', '.join( sorted(['%s (%s)' % (NUTS_NAMES[x], x) for x in special_regions.index if special_regions.ix[x].any() == True])) # TODO move plotting to its own function gender_plot = Bar(gender_query_adjusted, title="Gender distribution", ylabel="percentage", logo=None, toolbar_location="below", # width=300, # height=400, webgl=False) age_plot = Line(compare_age_df, x='i', title="Age distribution", x_range=Range1d(start=MIN_AGE, end=MAX_AGE), xlabel='age', ylabel="percentage", logo=None, toolbar_location="below", # width=800, # height=400, legend='top_right', color=['silver', 'red'], webgl=False) age_gender_plot_M = Line(compare_male_df, x='i', title="Age distribution for men", xlabel='age', ylabel="percentage", x_range=Range1d(start=MIN_AGE, end=MAX_AGE), logo=None, toolbar_location="below", # width=600, # height=400, legend='top_right', color=['silver', 'green'], webgl=False) age_gender_plot_F = Line(compare_female_df, x='i', title="Age distribution for women", xlabel='age', x_range=Range1d(start=MIN_AGE, end=MAX_AGE), logo=None, toolbar_location="below", # width=600, # height=400, legend='top_right', color=['silver', 'blue'], webgl=False) bokeh_script, (gender_plot_div, age_plot_div, age_gender_plot_F_div, age_gender_plot_M_div) = components( (gender_plot, age_plot, age_gender_plot_F, age_gender_plot_M)) return flask.render_template('single_term_results.html', query=search_terms, matches=matches, bokeh_script=bokeh_script, gender_query_adjusted=gender_query_adjusted, gender_plot=gender_plot_div, age_plot=age_plot_div, age_gender_plot_F=age_gender_plot_F_div, age_gender_plot_M=age_gender_plot_M_div, country_code=country_var, map_views=MAP_VIEWS, nuts_query=nuts_query_norm.to_json(), outliers=outliers, gender_total=gender_specific_query.sum(), age_total=age_specific_query.sum(), age_total_M=age_specific_male_totals, age_total_F=age_specific_female_totals, nuts_total=nuts_query.sum(), available_options=AVAILABLE_OPTIONS)
from bokeh.charts import Line, show, output_file # build a dataset where multiple columns measure the same thing data = dict( python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111], pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130], jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160], test=[ 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar' ]) # create a line chart where each column of measures receives a unique color and dash style line = Line(data, y=['python', 'pypy', 'jython'], dash=['python', 'pypy', 'jython'], color=['python', 'pypy', 'jython'], title="Interpreter Sample Data", ylabel='Duration', legend=True) output_file("line_single.html", title="line_single.py example") show(line)
# add a column with a range of dates, as if the values were sampled then df['date'] = pd.date_range('1/1/2015', periods=len(df.index), freq='D') # default behavior for dataframe input is to plot each numerical column as a line line = Line(df) # build the line plots line0 = Line(df, y=['python', 'pypy', 'jython'], title="Interpreters (y=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True) line1 = Line(df, x='date', y=['python', 'pypy', 'jython'], title="Interpreters (x='date', y=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True) line2 = Line(df, x='date', y=['python', 'pypy', 'jython'], dash=['python', 'pypy', 'jython'], title="Interpreters (x='date', y, dash=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True) line2.title_text_font_size = '11pt' line3 = Line(df, x='date', y=['python', 'pypy', 'jython'], dash=['python', 'pypy', 'jython'], color=['python', 'pypy', 'jython'], title="Interpreters (x='date', y, dash, color=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True) line3.title_text_font_size = '11pt' line4 = Line(df, x='date', y=['python', 'pypy', 'jython'], dash='test', color=['python', 'pypy', 'jython'], title="Interpreters (x='date', y, color=['python', 'pypy', 'jython'], dash='test') with tooltips", ylabel='Duration', legend=True, tooltips=[('series', '@series'), ('test', '@test')])