def ratioPlot(self): ratioInit = nds.ratioAnalysis(self.menStats,self.womenStats) ratioData = ratioInit.ratioData hover = HoverTool( tooltips = [ ("ratio","@ratio"), ("", "@desc"), ] ) ratioPlot = figure(plot_width=450, plot_height=450,tools=[hover]) ratioSource = ColumnDataSource( data=dict( x=ratioData['x'], y=ratioData['y'], desc=ratioData['desc'], ratio=ratioData['ratio'] ) ) ratioPlot.scatter( 'x', 'y', size=10, fill_color=[ '#F4C2C2','#FF0000','#D73B3E','#800000', '#98FB98','#03C03C','#008000','#006400' ], source=ratioSource,line_color=None,tools=[hover] ) show(ratioPlot)
def exibir(self): for i in self.dados.keys(): if len(self.dados[i]) > 0: continue else: return defaults.width, defaults.height = 400, 300 # prepare some data # input options hist_pontuacao = Histogram(self.dados["pontos"], title="Grades por pontuação", xlabel="Pontuação", ylabel="Número de grades", responsive=True, bins=30) hist_tamanho = Histogram(self.dados["tamanhos"], title="Grades por quantidade de disciplinas", xlabel="Número de disciplinas", ylabel="Número de grades", responsive=True, bins=8) hist_pop = Histogram(self.dados["popularidade"], title="Ocorrências da disciplina x", xlabel="Disciplina", ylabel="Ocorrências nas grades", responsive=True, bins=46) output_file("html/histograms.html") show(hplot( hist_pontuacao, hist_tamanho, hist_pop ))
def plot_histogram(same, diff): df = {"PAINS vs. PAINS": same, "PAINS vs. ChEMBL": diff} output_file("histogram_pains_self_diff.html") hist = Histogram(df, bins=20, density=True, legend=True) hist.x_range = Range1d(0, 1) #hist.legend.orientation = "top_right" show(hist)
def shows(self): bars = [] for i in range(0, len(self.__vkey)): bar = Bar(self.__data[i], values=self.__vkey[i], label=self.__lkey[i], agg='sum', title=self.__title[i], legend=self.__legend, width=1000) bars.append(bar) show(vplot(*bars))
def school_types_bar_chart(): schools_2016 = get_schools('urn/2016.urns.list', 2016) schools_2015 = get_schools('urn/2015.urns.list', 2015) y_data = { 'SR2015': [], 'SR2016': [] } x_categories = [] est_types = {} def _update_types(schools_dict): for s in schools_dict: est_type = s.data['typeOfEstablishment']['label'] year = s.data['comp'] try: est_types[est_type][year] += 1 except KeyError: est_types[est_type] = { 2015: 0, 2016: 0 } est_types[est_type][year] += 1 _update_types(schools_2015) _update_types(schools_2016) for est_type, yc in est_types.iteritems(): y_data['SR2015'].append(yc[2015]) y_data['SR2016'].append(yc[2016]) x_categories.append(est_type) output_file('visuals/establisment_types.html') bar = Bar(y_data, cat=x_categories, title='Establishment Types and count', xlabel='Type of Establishment', ylabel='Count', width=1000, height=600, legend=True) show(bar)
def dotplot(pname, filename, data, title, ylabel): titlex=pname+" : "+title filename=filename+"_"+title output_file(filename+".html") values = [value for name, value in data.items()] names = [name for name, value in data.items()] dots = Dot(values, cat=names, title=titlex, ylabel=ylabel, legend=False) show(dots)
def ts_vis_auto(steps,path): try: index_name,my_trend = parse_csv(path) if my_trend.count(0) <= 30: index_name,index_name_future,my_trend,future = holt_pred(steps,path) else: index_name,index_name_future,my_trend,future = sarima(steps,path) slope = np.polyfit(range(len(my_trend[-156:])),my_trend[-156:],1)[0] if slope >= 0: print 'This is a upward trending with slope: ', slope html_path = '/Users/royyang/Desktop/trending_project/html/'+path.split('/')[-1][:-4]+'.html' # html_path = 'example.html' output_file(html_path, title="bohek example") source1 = ColumnDataSource( data=dict( x1=index_name, y1=my_trend, Time1=[str(var).split()[0] for var in index_name], Intensity1=my_trend ) ) source2 = ColumnDataSource( data=dict( x2=index_name_future, y2=future, Time1=[str(var).split()[0] for var in index_name_future], Intensity1=[np.round(var,0) for var in future] ) ) TOOLS = "pan,wheel_zoom,box_zoom,reset,save,hover" p = figure(x_axis_type="datetime",plot_width=1000, plot_height=600, tools=TOOLS) p.line('x1','y1', color='red',legend='Past',source=source1) p.circle('x1','y1',size = 5,color = 'red',source=source1) p.line('x2','y2', color='blue', legend='Future',source=source2) p.circle('x2','y2',size = 8,color = 'blue',source=source2) p.xaxis.axis_label="Time" p.yaxis.axis_label="Search Intensity" p.title = "Search Prediction of "+path.split('/')[-1].split('.')[0] p.background_fill= "#cccccc" p.grid.grid_line_color="white" p.legend.label_standoff = 20 p.legend.glyph_width = 50 p.legend.legend_spacing = 15 p.legend.legend_padding = 1 p.legend.orientation = "top_left" hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ('Time', '@Time1'), ('Intensity', '@Intensity1'), ]) # save(p) show(p) except Exception as err: print 'There is no content in file: '+path
def _chart(self, chartcls, **kwargs): opts = dict(width=1000, height=500, legend='bottom_left') show = kwargs.pop('show', True) opts.update(self.kwargs()) opts.update(kwargs) p = chartcls(self.frame, **opts) if show: charts.show(p) else: charts.save(p) return p
def getDistrib(): if request.method == 'GET': return render_template('distrib.html') else: bronx=[[2009,'https://data.cityofnewyork.us/resource/en2c-j6tw.json'],[2010,'https://data.cityofnewyork.us/resource/n2s5-fumm.json'],[2011,'https://data.cityofnewyork.us/resource/bawj-6bgn.json'],[2012,'https://data.cityofnewyork.us/resource/3qfc-4tta.json']] brooklyn=[[2009,'https://data.cityofnewyork.us/resource/rmv8-86p4.json'],[2010,'https://data.cityofnewyork.us/resource/w6yt-hctp.json'],[2011,'https://data.cityofnewyork.us/resource/5mw2-hzqx.json'],[2012,'https://data.cityofnewyork.us/resource/bss9-579f.json']] manhattan=[[2009,'https://data.cityofnewyork.us/resource/956m-xy24.json'],[2010,'https://data.cityofnewyork.us/resource/ad4c-mphb.json'],[2011,'https://data.cityofnewyork.us/resource/ikqj-pyhc.json'],[2012,'https://data.cityofnewyork.us/resource/dvzp-h4k9.json']] queens=[[2009,'https://data.cityofnewyork.us/resource/m59i-mqex.json'],[2010,'https://data.cityofnewyork.us/resource/crbs-vur7.json'],[2011,'https://data.cityofnewyork.us/resource/s3zn-tf7c.json'],[2012,'https://data.cityofnewyork.us/resource/jcih-dj9q.json']] statenIsland=[[2009,'https://data.cityofnewyork.us/resource/cyfw-hfqk.json'],[2010,'https://data.cityofnewyork.us/resource/wv4q-e75v.json'],[2011,'https://data.cityofnewyork.us/resource/a5qt-5jpu.json'],[2012,'https://data.cityofnewyork.us/resource/tkdy-59zg.json']] featureNames=[['comparable_rental_2_market_value_per_sqft', 'Market value per square foot'],['comparable_rental_2_full_market_value', 'Full market value'],['comparable_rental_2_year_built', 'Year Built'],['comparable_rental_2_gross_income_per_sqft', 'Gross income per square foot']] #request was a POST (get the var from the form) #... All Boroughs are selected by default app_xplor.vars['feat'] = request.form['feat'] app_xplor.vars['year'] = request.form['year'] #Translating name of the feature into the name in the original database dbFeatureName = convertField(app_xplor.vars['feat'],featureNames) #Building the queries queryA = buildQuery(int(app_xplor.vars['year']),bronx,dbFeatureName) queryB = buildQuery(int(app_xplor.vars['year']),brooklyn,dbFeatureName) queryC = buildQuery(int(app_xplor.vars['year']),manhattan,dbFeatureName) queryD = buildQuery(int(app_xplor.vars['year']),queens,dbFeatureName) queryE = buildQuery(int(app_xplor.vars['year']),statenIsland,dbFeatureName) #executing the queries on the tables rawA = pd.read_json(queryA) rawB = pd.read_json(queryB) rawC = pd.read_json(queryC) rawD = pd.read_json(queryD) rawE = pd.read_json(queryE) #Managind the data to be input for a boxplot rawA['Borough']='Bronx' rawB['Borough']='Brooklyn' rawC['Borough']='Manhattan' rawD['Borough']='Queens' rawE['Borough']='Staten Island' allData = pd.concat([rawA, rawB, rawC, rawD, rawE]) cleanData= allData.dropna() cleanData.columns=[app_xplor.vars['feat'],'Borough'] #plot defaults.width = 450 defaults.height = 350 box_plot = BoxPlot(cleanData, label='Borough',title=str(app_xplor.vars['year'])) #box_plot = BoxPlot(cleanData, label='Borough',title='Year') output_file("templates/results.html") show( vplot( hplot(box_plot) ) ) return redirect('/goDistrib')
def plot(self, pct=False, output_file_path='temp_plot.html', title="", legend=True): """ Allows the user to plot the timeseries data in self.content using Bokeh. Parameters ---------- pct: bool Transformes the data to be percent change. output_file_path: str Path, including the name, for the output file. title: str The title of the graph and the html page. legend: bool Whether to include the legend or not. """ # Output to static HTML file output_file(output_file_path, title=title) no_cols_needed = len(self.content.columns) if no_cols_needed == 3: Spectral = bokeh.palettes.Spectral3 if no_cols_needed == 4: Spectral = bokeh.palettes.Spectral4 if no_cols_needed == 5: Spectral = bokeh.palettes.Spectral5 if no_cols_needed == 6: Spectral = bokeh.palettes.Spectral6 if no_cols_needed == 7: Spectral = bokeh.palettes.Spectral7 if no_cols_needed == 8: Spectral = bokeh.palettes.Spectral8 if no_cols_needed == 9: Spectral = bokeh.palettes.Spectral9 if no_cols_needed == 10: Spectral = bokeh.palettes.Spectral10 if no_cols_needed >= 11: Spectral = bokeh.palettes.Spectral11 data = self.content # Bokeh stumbles if the series starts with a nan. # Hopefully will be fixed in Bokeh 0.9.4 data = data.dropna(thresh=len(data.columns), axis=1) data.iloc[0] = data.iloc[0].fillna(0) p = TimeSeries(data, legend=legend, title=title, width=800, height=350) # width=800, height=350, palette=Spectral) if pct: data2 = self.content.pct_change() data2 = data2.dropna(thresh=len(data2.columns), axis=1) data2.iloc[0] = data2.iloc[0].fillna(0) p2 = TimeSeries(data2, legend=legend, title="Percent Change", width=800, height=350) # width=800, height=350, palette=Spectral) # show(vplot(p,p2)) show(p2) else: show(p)
def export(): nonlocal data import bokeh.charts as bch from bokeh.layouts import column # from bokeh.models import HoverTool, GlyphRenderer bch.output_file("Chart.html") data = data.iloc[1:, :] # TOOLS = "pan, wheel_zoom, box_zoom, crosshair, resize, reset "# , hover" title = "History (total result: {0:.2f} €)".format(result) if bonus > 0: title = title[:-1] + ", excluding Bonus: {0:.2f} €)".format(bonus) cols = ["Paid-in", "Balance", "Normalised", "Total Result"] if bonus > 0: cols = ["Paid-in", "Balance", "Normalised", "Total Result incl Bonus", "Total Result"] tsline = bch.TimeSeries(data, x="Time", y=cols, title=title, # tools=TOOLS, ylabel='Euro', legend=True, width=1250, height=550) """ from bokeh.models import HoverTool hover = HoverTool( tooltips=[ ("index", "$index"), ("(x,y)", "($x, $y)"), ("desc", "$balance"), # ("test", data.iloc["$index", 4]) ] ) tsline.add_tools(hover) """ if open_browser: bch.show(column(tsline)) else: bch.save(column(tsline)) import matplotlib.pyplot as plt import matplotlib matplotlib.style.use('ggplot') data.plot(x="Time", y=cols) plt.savefig("Chart.pdf")
def __init__(self, local_ip='n/a', public_ip='n/a', arp = []): # Set palette of colors for n vlans palette = [ "#004529", "#006837", "#238443", "#41ab5d", "#78c679", "#addd8e", "#d9f0a3", "#f7fcb9", "#ffffe5","#084081", "#0868ac", "#2b8cbe", "#4eb3d3", "#7bccc4", "#a8ddb5", "#ccebc5", "#e0f3db", "#f7fcf0","#4d004b", "#810f7c", "#88419d", "#8c6bb1", "#EFEFEF","#CFCFCF","#5F5F5F","#000000", "#8c96c6", "#9ebcda", "#bfd3e6", "#e0ecf4", "#f7fcfd","#fff7fb", "#ece2f0", "#d0d1e6", "#a6bddb", "#67a9cf", "#3690c0", "#02818a", "#016c59", "#014636","#67001f", "#980043", "#ce1256","#e7298a", "#df65b0", "#c994c7", "#d4b9da", "#e7e1ef", "#f7f4f9", "#fff7ec", "#fee8c8", "#fdd49e", "#fdbb84", "#fc8d59", "#ef6548", "#d7301f", "#b30000", "#7f0000" ] try: import pandas as pd except ImportError as e: raise RuntimeError("Data requires pandas (http://pandas.pydata.org) to be installed") data = pd.read_csv(join(dirname(__file__), "vlan.csv")) # pandas magic df = data[data.columns[:-1]] df2 = df.set_index(df[df.columns[0]].astype(str)) df2.drop(df.columns[0], axis=1, inplace=True) df3 = df2.transpose() output_file("output/switch.html", title = 'vlan map') # text_input = TextInput(value="VLAN NAME", title="Make Vlan:", callback= ) # Make Heapmap hm = HeatMap(df3, title="VLANs", width=950, palette=palette) # Make Ip/Arp Table x = [ 'Local', 'Public'] y = [ local_ip,public_ip] for i in arp: valueX = i[1].strip().strip("()") valueY = i[0].strip().strip("()") x.append(valueX) y.append(valueY) data = dict( sourceCol=x, ipCol=y, ) source = ColumnDataSource(data) columns = [ TableColumn(field="sourceCol", title="Source"), TableColumn(field="ipCol", title="IP Address"), ] data_table = DataTable(source=source, columns=columns, width=650, height=450) p = vform(hm,data_table) show(p)
def plot_accuracies_bokeh(accuracies, proteins, title, directory='C:\\uday\\gmu\\ngrams\\july_2016_results\\', ext='html'): if not os.path.exists(directory): os.makedirs(directory) filename = "%s.%s" % (title, ext) path_plus_filename = os.path.join(directory, filename) output_file(path_plus_filename) methods = ['RF', 'SVM', 'KNN', 'GNB'] accuracies_dict = {} proteins_and_accuracies = zip(proteins, accuracies) TOOLS = "pan,wheel_zoom,box_zoom,reset,save" for protein, accuracy in proteins_and_accuracies: accuracies_dict[protein] = accuracy bar = Bar(SortedDict(accuracies_dict), methods, title=title, stacked=False, legend='top_right', ylabel="accuracy", tools=TOOLS) show(bar)
def aggregate(transaction_history): net = dict() buy = dict() sell = dict() interest = dict() dividend = dict() historical = dict() for t in transaction_history: quarter = "%s-Q%i" % (t.date.year, (t.date.month-1)//3+1) net[quarter] = float(handleNull(net.get(quarter))) + float(t.cashflow) if t.kind == Transaction.BUY: buy[quarter] = float(handleNull(buy.get(quarter))) + float(t.cashflow) elif t.kind == Transaction.SELL: sell[quarter] = float(handleNull(sell.get(quarter))) + float(t.cashflow) elif t.kind == Transaction.INTEREST: interest[quarter] = float(handleNull(interest.get(quarter))) + float(t.cashflow) elif t.kind == Transaction.DIVIDEND: dividend[quarter] = float(handleNull(dividend.get(quarter))) + float(t.cashflow) elif t.kind == Transaction.HISTORICAL or t.kind == Transaction.CURRENT: historical[quarter] = float(handleNull(historical.get(quarter))) + float(t.cashflow) net = addMissingQuarters(net) buy = addMissingQuarters(buy) sell = addMissingQuarters(sell) interest = addMissingQuarters(interest) dividend = addMissingQuarters(dividend) historical = addMissingQuarters(historical) d = {'net': pd.Series(net), 'buy': pd.Series(buy), 'sell': pd.Series(sell), 'interest':pd.Series(interest), 'dividend':pd.Series(dividend), 'historical':pd.Series(historical)} df = pd.DataFrame(d) df['label']=df.index p1 = Bar(df, values = blend('buy','sell','interest','dividend','historical',name='cashflow', labels_name='cf'), label=cat(columns='label',sort=False), stack=cat(columns='cf',sort=False)) p2 = Bar(df, values = blend('net'), label='label') output_file("test.html") show(vplot(p1, p2))
def itemBarPlot(self): itemBar = Bar( {'men':self.menPlot.itemZip.values(),'women':self.womenPlot.itemZip.values()}, cat=self.menPlot.itemZip.keys(), <<<<<<< HEAD legend=True,title="Items", ======= legend=True,title="Item", >>>>>>> 447cf52eeef4c4ff3f94f33024d29e60d9041787 width=900,height=450, xlabel='Items',ylabel="Count", tools=None ) show(itemBar)
def kmeanspp(X, k, num_iter=100): centroids = init_centroids(X, k) for _ in range(num_iter): D = assign_to_clusters(X, centroids) centroids = recalc_centroids(X, D) df = pd.DataFrame(X) D = [d[1] for d in D] df = pd.concat([df, pd.Series(D)], axis=1) df.columns = ['x', 'y', 'cluster'] p = Scatter(df, x='x', y='y', color='cluster') show(p)
def show_histogram(values, title, filename): output_file('../results/{0}'.format(filename), title=title) distributions = OrderedDict(stars=values) df = pandas.DataFrame(distributions) distributions = df.to_dict() for k, v in distributions.items(): distributions[k] = v.values() hist = Histogram(df, bins=5, legend=True, title=title, ylabel="Frequency", xlabel="Ratings", width=800, height=800) show(hist)
def plot_PRC(y_test, predicted): """ Plots the PRC curve for the model input: true label vector of the data, predicted value vector """ # Get Precision & Recall metrics on test set precision, recall, thresholds = precision_recall_curve(y_test, predicted) # Plot PRC p = figure(title="Model Metrics (PRC)", plot_width=600, plot_height=600) p.line(x=recall, y=precision, color="firebrick", line_width=4) p.xaxis.axis_label = "Recall" p.yaxis.axis_label = "Precision" show(p)
def plot_ROC(y_test, predicted): """ Plots the ROC curve for the model input: true label vector of the data, predicted value vector """ # Get true positive rate & false positive rate fpr, tpr, thresholds = roc_curve(y_test, predicted) # Plot ROC p = figure(title="Model Metrics (ROC)", plot_width=600, plot_height=600) p.line(x=fpr, y=tpr, color="navy", line_width=4) p.xaxis.axis_label = "False Positive Rate" p.yaxis.axis_label = "True Positive Rate" show(p)
def plot_box(df, component): dfComp = df[df['component'] == component] dfComp_x = dfComp[dfComp['metric'] == 'x'] dfComp_y = dfComp[dfComp['metric'] == 'y'] dfComp_t = dfComp[dfComp['metric'] == 'theta'] finalxDf = lineup(dfComp_x, 'x') finalyDf = lineup(dfComp_y, 'y') finaltDf = lineup(dfComp_t, 'theta') frames = [finalxDf, finalyDf, finaltDf] xytDf = pd.concat(frames) p = BoxPlot(xytDf, values='value', label=['SN', 'metric'], color='SN', legend=None, title=componentb) #p.add_tools(HoverTool()) show(p)
def plot_size_count(dir,title): data = read_csv() data_size = map(lambda x: x/1024, data.SIZE.tolist()) data_dir = data.D.tolist() length = len(data_size) data_to_show = [] for i in range(0,length): if(data_dir[i] == dir): data_to_show.append(data_size[i]) print "Total data:", sum(data_to_show) , "MB" p = charts.Histogram(data_to_show,bins=100,color='#FB9A99',title=title) charts.output_file("/tmp/"+title+".html",title=title) charts.show(p)
def plot_shuffle_size_count(csv_file,dir,title): data = read_csv(csv_file) data_size = map(lambda x: x/1024, data.SIZE.tolist()) data_dir = data.D.tolist() data_path = data.PATHNAME.tolist() length = len(data_size) data_to_show = [] for i in range(0,length): if(data_dir[i] == dir and 'shuffle' in data_path[i]): data_to_show.append(data_size[i]) print "Total shuffle data:", sum(data_to_show) , "MB" p = charts.Histogram(data_to_show,bins=100,color='#1F78B4',title=title) charts.output_file("/tmp/"+title+".html",title=title) charts.show(p)
def make_plot(which_cuisine): reviewDf = pd.read_pickle("review2.pkl") businessDf = pd.read_pickle("business2.pkl") # best indian restaurant p4 = Bar( businessDf[businessDf.category_id == which_cuisine], values="avg_star", label="name", agg="max", color="wheat", title="Best " + which_cuisine + " by star rating alone", xlabel="Restaurant name", ylabel="Star rating", ) output_file("templates/plots.html") # p = vplot(p4) show(p4)
def epochs_perf_plot(hist): """ Create plot of model performance by epoch input: nn history object, # epochs returns bokeh line plot """ epochs = len(hist.history['acc']) p = figure(title="Model Performance (Training Set)", plot_width=600, plot_height=600) p.line(x=range(0, epochs), y=hist.history['loss'], color="firebrick", line_width=4, legend="Loss") p.line(x=range(0, epochs), y=hist.history['acc'], color="navy", line_width=4, legend="Accuracy") p.legend.orientation = "bottom_left" p.xaxis.axis_label = "Epoch" show(p)
def output_chart(issues_df,output_mode='static'): import datetime import bokeh from bokeh.models import HoverTool # Add timestamp to title issues_chart = Bar(issues_df, label='value_delivered', values='status', agg='count', stack='status', title=ISSUES_TITLE+" (Updated "+datetime.datetime.now().strftime('%m/%d/%Y')+")", xlabel="Value Delivered",ylabel="Number of Use Cases", legend='top_right', tools='hover', color=brewer["GnBu"][3] ) issues_chart.plot_width = DESTINATION_FRAME_WIDTH - (HTML_BODY_MARGIN * 2) issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2) issues_chart.logo = None issues_chart.toolbar_location = None hover = issues_chart.select(dict(type=HoverTool)) hover.tooltips = [ ("Value Delivered", "$x")] #--- Configure output --- reset_output() if output_mode == 'static': # Static file. CDN is most space efficient output_file(ISSUES_FILE, title=ISSUES_TITLE, autosave=False, mode='cdn', root_dir=None ) # Generate file save(issues_chart,filename=ISSUES_FILE) elif output_mode == 'notebook': output_notebook() # Show inline show(issues_chart) else: # Server (using internal server IP, rather than localhost or external) session = bokeh.session.Session(root_url = BOKEH_SERVER_IP, load_from_config=False) output_server("ddod_chart", session=session) show(issues_chart)
def locations_bar_chart(): schools_2016 = get_schools("urn/2016.urns.list", 2016) schools_2015 = get_schools("urn/2015.urns.list", 2015) y_data = {"SR2015": [], "SR2016": []} x_categories = [] locations = {} def _update_locations(schools_dict): for s in schools_dict: addr = s.data["address"] try: town = addr["town"] except KeyError: town = addr["address3"] year = s.data["comp"] try: locations[town][year] += 1 except KeyError: locations[town] = {2015: 0, 2016: 0} locations[town][year] += 1 _update_locations(schools_2015) _update_locations(schools_2016) for loc, yc in locations.iteritems(): y_data["SR2015"].append(yc[2015]) y_data["SR2016"].append(yc[2016]) x_categories.append(loc) output_file("visuals/locations_count.html") bar = Bar( y_data, cat=x_categories, title="School count and locations", xlabel="Locations", ylabel="Count", width=1000, height=600, legend=True, ) show(bar)
def graph_feature_importances(model, feature_names, autoscale=True, headroom=0.05, width=10, summarized_columns=None): ''' Author: Mike Bernico Purpose: Graphs the feature importances of a random decision forest using a horizontal bar chart. Parameters ---------- ensemble = Name of the ensemble whose features you would like graphed. feature_names = A list of the names of those features, displayed onthe Y axis. autoscale = True (Automatically adjust the X axis size to the largest feature +.headroom) / False = scale from 0 to 1 headroom = used with auroscale, 0.05 default width=figure width in inches summarized_columns = a list of column prefixes to summarize on, for dummy variables (e.g. ["day_"] would summarize all ) ''' if autoscale: x_scale = model.feature_importances_.max()+ headroom else: x_scale = 1 feature_dict=dict(zip(feature_names, model.feature_importances_)) if summarized_columns: #some dummy columns to be summarized for col_name in summarized_columns: #sum all of the features that contain col_name, store in temp sum_value sum_value = sum(x for i, x in feature_dict.items() if col_name in i) #now remove all keys that are part of col_name keys_to_remove = [i for i in feature_dict.keys() if col_name in i] for i in keys_to_remove: feature_dict.pop(i) #lastly, read the summarized field feature_dict[col_name] = sum_value #Create a graph results = pd.Series(feature_dict.values(), index=feature_dict.keys()) p = Bar(results, ylabel="Feature Importance", legend=None, logo=None) output_file("featureImportance.html") show(p) print feature_dict feature_dict = pd.DataFrame(feature_dict)
def plot_addr_count(direction, title): data = read_csv() data_addr = data.BLOCK.tolist() data_dir = data.D.tolist() map_dir2addr = zip(data_dir, data_addr) if direction == 'R': dic_out = [] for item in map_dir2addr: if item[0]=='R': dic_out.append(item[1]) elif direction == 'W': dic_out = [] for item in map_dir2addr: if item[0]=='W': dic_out.append(item[1]) p = charts.Histogram(dic_out, bins=100, color='#FB9A99', title=title) charts.output_file("/tmp/%s.html" % title) charts.show(p)
def get_trains_week(stat_code): sbn.set_style("white") stat_vals = dis_trains[dis_trains.code == stat_code] all_trains = stat_vals.times.values xx = all_trains days = np.array(xx[0])/1440 tot_mins = np.array(xx[0])%1440 hour = tot_mins/60 mins = tot_mins % 60 train_time = zip(days,hour,mins) hist, edges = np.histogram(xx[0], bins = range(0,10081,120)) fig = figure(x_range = (0,10080), y_range = (0,max(hist+1))) d = np.sin(3*gradient) fig.image(image = [d],x = 0, y = 0, dw = 10080, dh = max(hist)+1) fig.quad(top=hist, bottom=0, left=edges[:-1],right=edges[1:],fill_color="#036564",line_color="#033649") fig.xaxis[0].ticker=FixedTicker(ticks=[]) fig.xaxis.major_label_orientation = "vertical" output_file("test_bg_image.html", title = "Background image") show(fig) return hist,edges
def bar_response(results_list, output_path): output_dir = os.path.join(output_path, "charts") if os.path.isdir(output_dir) is False: os.mkdir(output_dir) tools = "pan,wheel_zoom,box_zoom,reset,hover,save" for df in results_list: print(df) p = Bar(df, label='hh_type', values='perc_res', stack='digital', title="a_title", legend='top_right', tools=tools) hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("count", "@height"), ] output_file_path = os.path.join(output_dir, 'test bar.html') output_file(output_file_path) show(p)
bar = Bar(data, values='data',\ label=cat(columns='x', sort=False),\ title="Top Hashtags Used By Trump", \ legend = False, xlabel="Hashtags", ylabel="Number of Occurance") labels_2, freq_2 = zip(*trump_most_common_mentions) data_2 = {'data_2': freq_2, 'x_2': labels_2} bar_2 = Bar(data_2, values='data_2',\ label=cat(columns='x_2', sort=False),\ title="Top User Mentions By Trump", \ legend = False, xlabel="User Mentions", ylabel="Number of Occurance") output_file("trump_top_mentions.png.html") show(row(bar, bar_2)) #%% # Let's Geo Track Trump import matplotlib.pyplot as plt from mpl_toolkits.basemap import Basemap # Coordinates coord_frame = pd.DataFrame( tweet_df[tweet_df.handle == 'realDonaldTrump']['place']) coord_frame[['Long', 'Lat']] = coord_frame['place'].apply(pd.Series) lon_min, lon_max = -130, -55 lat_min, lat_max = 20, 50 plt.figure(2, figsize=(12, 6))
from bokeh.charts import Histogram, output_file, show from bokeh.sampledata.autompg import autompg as df df.sort_values(by='cyl', inplace=True) hist = Histogram(df, values='hp', color='cyl', title="HP Distribution by Cylinder Count", legend='top_right') output_file("histogram_single.html", title="histogram_single.py example") show(hist)
df2 = pd.melt(df2, id_vars=['abbr', 'name']) scatter5 = Scatter(df2, x='value', y='name', color='variable', title="x='value', y='name', color='variable'", xlabel="Medals", ylabel="Top 10 Countries", legend='bottom_right') scatter6 = Scatter( flowers, x=blend('petal_length', 'sepal_length', name='length'), y=blend('petal_width', 'sepal_width', name='width'), color='species', title= 'x=petal_length+sepal_length, y=petal_width+sepal_width, color=species', legend='top_right') output_file("scatter_multi.html", title="scatter_multi.py example") show( gridplot(scatter0, scatter2, scatter3, scatter4, scatter5, scatter6, ncols=2))
# RowPlot-Histogram from bokeh.charts import Histogram, output_file, show from bokeh.layouts import row from bokeh.sampledata.autompg import autompg as df ################################################## hist = Histogram(df, values='mpg', title="Auto MPG Histogram", plot_width=400) # Compare Both Data Specifications hist2 = Histogram(df, values='mpg', label='cyl', color='cyl', legend='top_right', title="MPG Histogram by Cylinder Count", plot_width=400) ################################################# output_file('hist.html') show(row(hist, hist2))
elif (userFlag == '5'): if (dataOptions == 2): bubbleSortA(diseaseCounter, singleDiseaseList) ascendingData = { 'Disease Name': singleDiseaseList, '# of Occurances': diseaseCounter } asc = Bar(ascendingData, values='# of Occurances', label=CatAttr(columns=['Disease Name'], sort=False), title='U.S. Chronic Diseases(2007-2013)', color=userColor) output_file("bar2.html") show(asc) elif (dataOptions == 3): bubbleSortD(diseaseCounter, singleDiseaseList) descendingData = { 'Disease Name': singleDiseaseList, '# of Occurances': diseaseCounter } des = Bar(descendingData, values='# of Occurances', label=CatAttr(columns=['Disease Name'], sort=False), title='U.S. Chronic Diseases(2007-2013)', color=userColor) output_file("bar3.html") show(des)
dash=['IBM', 'MSFT', 'AAPL'], title="Timeseries (Line Explicit)", tools=TOOLS, ylabel='Stock Prices') # step tsstep = TimeSeries(data, x='Date', y=['IBM', 'MSFT', 'AAPL'], legend=True, builder_type='step', title="Timeseries (Step)", tools=TOOLS, ylabel='Stock Prices') # point tspoint = TimeSeries(data, x='Date', y=['IBM', 'MSFT', 'AAPL'], legend=True, builder_type='point', marker=['IBM', 'MSFT', 'AAPL'], color=['IBM', 'MSFT', 'AAPL'], title="Timeseries (Point)", tools=TOOLS, ylabel='Stock Prices') output_file("timeseries.html", title="timeseries.py example") show(vplot(tsline, tsline2, tsstep, tspoint))
# # Using Bokeh # In[44]: from bokeh.io import output_notebook output_notebook() # In[45]: from bokeh.charts import Histogram, output_file, show # create a new plot with a title and axis labels p1 = Histogram(samp['SCORE']) # output_file("histogram.html") show(p1) # In[46]: from bokeh.charts import Histogram, output_file, show from bokeh.sampledata.autompg import autompg as df p2 = Histogram(mRests, 'SCORE', color='GRADE', title="Score Grouped by Grade", bins=15, legend='top_right') # output_file("histogram_color.html")
stat='mean', legend='top_right') hm9 = HeatMap(fruits, y='year', x='fruit', values='fruit_count', stat=None) hm10 = HeatMap(unempl, x='Year', y='Month', values='Unemployment', stat=None, sort_dim={'x': False}, width=900, plot_height=500) output_file("hm11.html", title="Bokeh heatmap example (hm11.py)") show( column( gridplot(hm1, hm2, hm3, hm4, hm5, hm6, hm7, hm8, hm9, ncols=1, plot_width=800, plot_height=800), hm10))
class Dado: def __init__(self, numero_de_dados, numero_de_lados): self.numero_de_dados = numero_de_dados self.numero_de_lados = numero_de_lados def tirar_dados(self): resultado = [] for _ in range(1, self.numero_de_dados + 1): resultado.append(random.randint(1, self.numero_de_lados)) return resultado if __name__ == "__main__": dado_6 = Dado(2, 6) #realizacion = dado_6.tirar_dados() #print(realizacion) #print(sum(realizacion)) sumas = [] tiradas = 10000 for _ in range(tiradas): realizacion = dado_6.tirar_dados() suma = sum(realizacion) sumas.append(suma) print(len(sumas)) grafica = Histogram(sumas, title='Suma de dos dados con 10000 realizaciones') output_file('suma_de_dados.html') show(grafica)
width=800) output_file('ECG_DataQuality_of_' + str(ids[i]) + ".html", title="ECG_BAR_Plot") source = ColumnDataSource(data) columns = [ TableColumn(field="Date", title="Date"), TableColumn(field="ACCEPTABLE", title="ACCEPTABLE(Hours)"), TableColumn(field="UNACCEPTABLE", title="UNACCEPTABLE(Hours)"), TableColumn(field="Total", title="Total(Hours)") ] data_table = DataTable(source=source, columns=columns, width=500, height=400) show(hplot(bar, vform(data_table))) dataf['ACCEPTABLE'] = dataf['ACCEPTABLE'] + data['ACCEPTABLE'] dataf['UNACCEPTABLE'] = dataf['UNACCEPTABLE'] + data['UNACCEPTABLE'] dataf['Date'] = dataf['Date'] + data['Date'] dataf['Total'] = dataf['Total'] + data['Total'] dataf['ParticipantID'] = dataf['ParticipantID'] + [ ids[i] for f in range(len(data['Total'])) ] df = pd.DataFrame(dataf) df.to_excel('ECG.xlsx')
import pandas as pd from bokeh.charts import Horizon, output_file, show # read in some stock data from the Yahoo Finance API AAPL = pd.read_csv( "http://ichart.yahoo.com/table.csv?s=AAPL&a=0&b=1&c=2000&d=0&e=1&f=2010", parse_dates=['Date']) MSFT = pd.read_csv( "http://ichart.yahoo.com/table.csv?s=MSFT&a=0&b=1&c=2000&d=0&e=1&f=2010", parse_dates=['Date']) IBM = pd.read_csv( "http://ichart.yahoo.com/table.csv?s=IBM&a=0&b=1&c=2000&d=0&e=1&f=2010", parse_dates=['Date']) data = dict([('AAPL', AAPL['Adj Close']), ('Date', AAPL['Date']), ('MSFT', MSFT['Adj Close']), ('IBM', IBM['Adj Close'])]) hp = Horizon(data, x='Date', plot_width=800, plot_height=300, title="horizon plot using stock inputs") output_file("horizon.html") show(hp)
# -*- coding:utf-8 -*- __author__ = 'chunhui.zhang' from bokeh.charts import Line, show, output_file # build a dataset where multiple columns measure the same thing data = dict( python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111], pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130], jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160], test=[ 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar' ]) # create a line chart where each column of measures receives a unique color and dash style line = Line(data, y=['python', 'pypy', 'jython'], dash=['python', 'pypy', 'jython'], color=['python', 'pypy', 'jython'], legend_sort_field='color', legend_sort_direction='ascending', title="Interpreter Sample Data", ylabel='Duration', legend=True) output_file("line_single.html", title="line_single.py example") show(line)
# line simple tsline = TimeSeries( data, y=['IBM', 'MSFT', 'AAPL'], legend=True, title="Timeseries (Line)", tools=TOOLS, ylabel='Stock Prices', xlabel='Date') # line explicit tsline2 = TimeSeries( data, y=['IBM', 'MSFT', 'AAPL'], legend=True, color=['IBM', 'MSFT', 'AAPL'], dash=['IBM', 'MSFT', 'AAPL'], title="Timeseries (Line Explicit)", tools=TOOLS, ylabel='Stock Prices', xlabel='Date') # step tsstep = TimeSeries( data, y=['IBM', 'MSFT', 'AAPL'], legend=True, builder_type='step', title="Timeseries (Step)", tools=TOOLS, ylabel='Stock Prices', xlabel='Date') # point tspoint = TimeSeries( data, y=['IBM', 'MSFT', 'AAPL'], legend=True, builder_type='point', marker=['IBM', 'MSFT', 'AAPL'], color=['IBM', 'MSFT', 'AAPL'], title="Timeseries (Point)", tools=TOOLS, ylabel='Stock Prices', xlabel='Date') output_file("timeseries.html", title="timeseries.py example") show(column(tsline, tsline2, tsstep, tspoint))
from bokeh.charts import Dot, show, output_file # best support is with data in a format that is table-like data = { 'sample': ['1st', '2nd', '1st', '2nd', '1st', '2nd'], 'interpreter': ['python', 'python', 'pypy', 'pypy', 'jython', 'jython'], 'timing': [-2, 5, 12, 40, 22, 30], } # x-axis labels pulled from the interpreter column, stacking labels from sample column dots = Dot(data, values='timing', label='interpreter', group='sample', agg='mean', title="Python Interpreter Sampling", legend='top_right', width=600) output_file("dots.html") show(dots)
def process_stats(self, input_path_file, output_path_file_csv, output_path_file_html): df = pd.read_json(input_path_file, orient='index') df.to_csv(output_path_file_csv + '.csv', index=True, sep='\t') # plotting read processing(poly/single A removed or unmodified) conditions = [] for row in df.iterrows(): conditions.extend( ['poly(A) removed', 'single(A) removed', 'unmodified']) samples1 = [] for index, row in df.iterrows(): samples1.extend([index] * 3) read_nr = [] for index, row in df.iterrows(): read_nr.extend([ row['polya_removed'], row['single_a_removed'], row['unmodified'] ]) data1 = {} data1['condition'] = conditions data1['sample'] = samples1 data1['Nr. of reads'] = read_nr bar1 = Bar(data1, values='Nr. of reads', label='sample', stack='condition', agg='sum', title="Input read types", legend='top_right', palette=['darkseagreen', 'salmon', 'darkslateblue'], tools=[ HoverTool(tooltips=[("Sample", "@x"), ("Nr of reads", "@y")]), PanTool(), BoxSelectTool(), BoxZoomTool(), WheelZoomTool(), ResizeTool(), ResetTool() ]) samples2 = [] read_length = [] frequency = [] for index, row in df.iterrows(): for key, value in row[ 'read_length_after_processing_and_freq'].items(): samples2.extend([index] * int(self.nr_items_pro(key))) read_length.extend([float(key)]) frequency.extend([value]) data2 = {} data2['samples'] = samples2 data2['read length'] = read_length data2['frequency'] = frequency bar2 = Bar( data2, values='frequency', label='read length', stack='samples', agg='sum', title="Input read length and frequency", legend='top_left', palette=['darkseagreen', 'salmon', 'darkslateblue', 'olive'], width=1200, bar_width=1.0, tools=[ HoverTool(tooltips=[("Read length", "@x"), ("Frequency", "@y")]), PanTool(), BoxSelectTool(), BoxZoomTool(), WheelZoomTool(), ResizeTool(), ResetTool() ]) bar = column(bar1, bar2) output_file(output_path_file_html + '.html') show(bar)
line2.title_text_font_size = '11pt' line3 = Line( df, x='date', y=['python', 'pypy', 'jython'], dash=['python', 'pypy', 'jython'], color=['python', 'pypy', 'jython'], title= "Interpreters (x='date', y, dash, color=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True) line3.title_text_font_size = '11pt' line4 = Line( df, x='date', y=['python', 'pypy', 'jython'], dash='test', color=['python', 'pypy', 'jython'], title= "Interpreters (x='date', y, color=['python', 'pypy', 'jython'], dash='test') with tooltips", ylabel='Duration', legend=True, tooltips=[('series', '@series'), ('test', '@test')]) output_file("line_multi.html", title="line examples") show(vplot(hplot(line), hplot(line0, line1), hplot(line2, line3), hplot(line4)))
# create a DataFrame with the sample data df = pd.io.json.json_normalize(data['data']) # filter by countries with at least one medal and sort df = df[df['medals.total'] > 0] df = df.sort("medals.total", ascending=False) # get the countries and group the data by medal type countries = df.abbr.values.tolist() gold = df['medals.gold'].astype(float).values silver = df['medals.silver'].astype(float).values bronze = df['medals.bronze'].astype(float).values # build a dict containing the grouped data medals = OrderedDict(bronze=bronze, silver=silver, gold=gold) # any of the following commented are valid BoxPlot inputs #medals = pd.DataFrame(medals) #medals = list(medals.values()) #medals = tuple(medals.values()) #medals = np.array(list(medals.values())) output_file("boxplot.html") boxplot = BoxPlot( medals, marker='circle', outliers=True, title="boxplot test", xlabel="medal type", ylabel="medal count", width=800, height=600) show(boxplot)
from bokeh.charts import Scatter, output_file, show from bokeh.sampledata.autompg import autompg as df p = Scatter(df, x='mpg', y='hp', color='cyl', title="HP vs MPG (shaded by CYL)", xlabel="Miles Per Gallon", ylabel="Horsepower") output_file("scatter.html") show(p)
def handle(self, *args, **options): #Initialize all variables file_input = '/Users/Jason Plagens/Documents/Spring Semester 2016/ISA 406/Project/Django Project/isa406/sawa/data/Product Reviews - PowerBar Energy Blends.csv' #Initialize the sentiment factor lists terrible_list = [] bad_list = [] neutral_list = [] good_list = [] excellent_list = [] #Initialize empty lists for review information activity_text_list = [] clean_activity_text_list = [] created_date_list = [] rating_list = [] activity_name_list = [] title_list = [] #Initialize empty lists for user who posted review information username_list = [] age_list = [] gender_list = [] city_list = [] #Initialize empty lists for media reach of review information facebook_list = [] twitter_list = [] offline_list = [] with open(file_input, encoding='utf-8') as csvfile: readCSV = csv.reader(csvfile, delimiter=',') #Initialize variables username = "" age = 0 gender = "" city = "" activity_text = "" created_date = "" rating = 0 activity_name = "" title = "" facebook = 0 twitter = 0 offline = 0 #For each row in CSV, add review data to associated list for row in readCSV: activity_text = row[9] created_date = row[1] rating = row[6] activity_name = row[7] title = row[8] activity_text_list.append(activity_text) created_date_list.append(created_date) rating_list.append(rating) activity_name_list.append(activity_name) title_list.append(title) #For each row in CSV, add user who posted review data to associated list for row in readCSV: username = row[0] age = row[2] gender = row[3] city = row[4] username_list.append(username) age_list.append(age) gender_list.append(gender) city_list.append(city) #For each row in CSV, add media reach of review data to associated list for row in readCSV: facebook = row[0] twitter = row[2] offline = row[3] facebook_list.append(facebook) twitter_list.append(twitter) offline_list.append(offline) #Print the total number of reviews that are in the file print("Total number of reviews = ", len(activity_text_list)) #For loop to iterate over each item in the review list for list_item in activity_text_list: #Perform textblob analysis review_polarity = TextBlob(list_item).sentiment.polarity #If statements that determine the review's sentiment result and which bucket it falls into if review_polarity >= -1 and review_polarity < -0.5: #Save into internal list terrible_sentiment = review_polarity terrible_list.append(terrible_sentiment) #Save in db terrible_sentiment = Sentiment( terrible_sentiment=review_polarity) terrible_sentiment.save() elif review_polarity >= -0.5 and review_polarity < 0: #Save into internal list bad_sentiment = review_polarity bad_list.append(bad_sentiment) #Save into db bad_sentiment = Sentiment(bad_sentiment=review_polarity) bad_sentiment.save() elif review_polarity == 0: #Save into internal list neutral_sentiment = review_polarity neutral_list.append(neutral_sentiment) #Save into db neutral_sentiment = Sentiment( neutral_sentiment=review_polarity) neutral_sentiment.save() elif review_polarity > 0 and review_polarity <= 0.5: #Save into internal list good_sentiment = review_polarity good_list.append(good_sentiment) #Save into db good_sentiment = Sentiment(good_sentiment=review_polarity) good_sentiment.save() elif review_polarity <= 1 and review_polarity > 0.5: #Save into internal list excellent_sentiment = review_polarity excellent_list.append(excellent_sentiment) #Save into db excellent_sentiment = Sentiment( excellent_sentiment=review_polarity) excellent_sentiment.save() #Create object for count of sentiment and save to db ct_object = SentimentCount(ct_terrible=len(terrible_list), ct_bad=len(bad_list), ct_neutral=len(neutral_list), ct_good=len(good_list), ct_excellent=len(excellent_list)) ct_object.save() #Create object for percentage of sentiment and save to db pt_object = SentimentPercentage( pt_terrible=(len(terrible_list) / len(activity_text_list)), pt_bad=(len(bad_list) / len(activity_text_list)), pt_neutral=(len(neutral_list) / len(activity_text_list)), pt_good=(len(good_list) / len(activity_text_list)), pt_excellent=(len(excellent_list) / len(activity_text_list))) pt_object.save() data = { 'sentiment factor': ['Terrible', 'Bad', 'Neutral', 'Good', 'Excellent'], 'sentiment count': [ len(terrible_list), len(bad_list), len(neutral_list), len(good_list), len(excellent_list) ] } p = Bar(data, values='sentiment count', label='sentiment factor', title="Sentiment of Reviews", legend='top_right', width=400) output_file("bar.html") show(p)
from bokeh.charts import BoxPlot, output_file, show from bokeh.sampledata.autompg import autompg as df # origin = the source of the data that makes up the autompg dataset title = "MPG by Cylinders and Data Source, Colored by Cylinders" # color by one dimension and label by two dimensions # coloring by one of the columns visually groups them together box_plot = BoxPlot(df, label=['cyl', 'origin'], values='mpg', color='cyl', title=title) output_file("boxplot_single.html") show(box_plot)
title="label=['cyl', 'origin'] color='cyl'", color='cyl') # specify custom marker for outliers box_plot6 = BoxPlot(df, label='cyl', values='mpg', marker='cross', title="label='cyl', values='mpg', marker='cross'") # color whisker by cylinder box_plot7 = BoxPlot(df, label='cyl', values='mpg', whisker_color='cyl', title="label='cyl', values='mpg', whisker_color='cyl'") # remove outliers box_plot8 = BoxPlot(df, label='cyl', values='mpg', outliers=False, title="label='cyl', values='mpg', outliers=False") # collect and display output_file("boxplot.html") show( vplot(hplot(box_plot, box_plot2, box_plot3), hplot(box_plot4, box_plot5, box_plot6), hplot(box_plot7, box_plot8)))
# Example with nested json/dict like data, which has been pre-aggregated and pivoted df2 = df_from_json(data) df2 = df2.sort('total', ascending=False) df2 = df2.head(10) df2 = pd.melt(df2, id_vars=['abbr', 'name']) scatter5 = Scatter(df2, x='value', y='name', color='variable', title="x='value', y='name', color='variable'", xlabel="Medals", ylabel="Top 10 Countries", legend='bottom_right') scatter6 = Scatter( flowers, x=blend('petal_length', 'sepal_length', name='length'), y=blend('petal_width', 'sepal_width', name='width'), color='species', title= 'x=petal_length+sepal_length, y=petal_width+sepal_width, color=species', legend='top_right') output_file("scatter_multi.html") show( vplot(hplot(scatter0, scatter1), hplot(scatter2, scatter3), hplot(scatter4, scatter5), hplot(scatter6)))
# multiple columns dot_plot5 = Dot(df, label=['cyl', 'origin'], values='mpg', agg='mean', title="label=['cyl', 'origin'] values='mpg' agg='mean'") dot_plot6 = Dot(df, label='origin', values='mpg', agg='mean', stack='cyl', title="label='origin' values='mpg' agg='mean' stack='cyl'", legend='top_right') dot_plot7 = Dot(df, label='cyl', values='displ', agg='mean', group='origin', title="label='cyl' values='displ' agg='mean' group='origin'", legend='top_right') dot_plot8 = Dot(df, label='cyl', values='neg_mpg', agg='mean', group='origin', color='origin', legend='top_right', title="label='cyl' values='neg_mpg' agg='mean' group='origin'") # infer labels from index df = df.set_index('cyl') dot_plot9 = Dot(df, values='mpg', agg='mean', legend='top_right', title='inferred labels') # collect and display output_file("dots_multi.html") show( vplot( hplot(dot_plot, dot_plot2, dot_plot3), hplot(dot_plot4, dot_plot5, dot_plot6), hplot(dot_plot7, dot_plot8, dot_plot9) ) )
pullList.append(str(pull) + detColorNonFocus(pull)) for pull in pullList: # For each individual in a set of 5 if pull[1] in pullColorStillNeed: # if its still a color i need pullNumber = pullNumber + 1 # I pull it from the set totalPulls = totalPulls + 1 orbCount = orbCount + orbsForPull( pullNumber) # purchasing and adding to orbs if pull[1:] in sampleStillWant: # if the pull is a focus sampleStillWant.remove(pull[1:]) # remove that pullColorStillNeed.remove(pull[1]) if int(pull[0]) >= 5: totalPulls = 0 otherPulls[pull[0]] = otherPulls[pull[0]] + 1 if len(sampleStillWant) == 0: gotAllRare = True orbsUsed.append(orbCount) print("Average orbs required for pulling", str(len(wantFocus)) + " focus heroes:", str(np.mean(orbsUsed))) print("You got an average of:\n", otherPulls['3'] / sampleSize, "3 stars\n", otherPulls['4'] / sampleSize, "4 stars\n", otherPulls['5'] / sampleSize, "5 stars\n", otherPulls['6'] / sampleSize - len(wantFocus), "other focus characters") print("for an average of", sum(otherPulls.values()) / sampleSize, "new characters at a price of", round(sum(orbsUsed) / sum(otherPulls.values()), 3), 'orbs per character') show(Histogram(orbsUsed, plot_width=1800, plot_height=900))
import pandas as pd from bokeh.charts import Bar, output_file, show from bokeh.sampledata.olympics2014 import data df = pd.io.json.json_normalize(data['data']) # filter by countries with at least one medal and sort df = df[df['medals.total'] > 0] df = df.sort("medals.total", ascending=False) # get the countries and we group the data by medal type countries = df.abbr.values.tolist() gold = df['medals.gold'].astype(float).values silver = df['medals.silver'].astype(float).values bronze = df['medals.bronze'].astype(float).values # build a dict containing the grouped data medals = OrderedDict(bronze=bronze, silver=silver, gold=gold) # any of the following commented are also alid Bar inputs #medals = pd.DataFrame(medals) #medals = list(medals.values()) output_file("stacked_bar.html") bar = Bar(medals, countries, title="Stacked bars", stacked=True) show(bar)
from collections import OrderedDict from bokeh.charts import Line, show, output_file xyvalues = OrderedDict( python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111], pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130], jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160], ) # any of the following commented are also valid Line inputs #xyvalues = pd.DataFrame(xyvalues) #xyvalues = xyvalues.values() #xyvalues = np.array(xyvalues.values()) output_file("lines.html", title="line.py example") chart = Line(xyvalues, title="Lines", ylabel='measures', legend=True) show(chart)
from bokeh.charts import HeatMap, bins, output_file, show import pandas as pd DATA_FILE = '../../samples/GSM188012.CEL' dtype = {'x': int, 'y': int, 'lux': float} dataset = pd.read_csv(DATA_FILE, sep='\t', dtype=dtype) hm = HeatMap(dataset, x=bins('x'), y=bins('y'), values='lux', title='Expression', stat='mean') output_file("heatmap7.html", title="heatmap.py example") show(hm)
workit.signalToBackground() ] csvwriter.writerow(stats) calc_inhibitions = workit.percentInhibition() list_inhibitions.append(calc_inhibitions) all_inhibitions = concat(list_inhibitions) all_inhibitions.to_csv(inhibitions_output + ' ' + project_code + ' ' + project_date + '.csv', index=False) # Generate HeatMap visualizations using Bokeh library for each plate in percent inhibitions Dataframe. output_file(viz_output + ' ' + project_code + ' ' + project_date + '.html') graphs = [] for plate2 in barcodes: aplate = all_inhibitions[(all_inhibitions['Barcode'] == plate2)] hm = HeatMap(aplate, x='Column', y='Reverse Row', values='Percent Inhibition', palette=RdYlBu3, title=plate2, stat=None, hover_tool=True) graphs.append(hm) arranged_graphs = tuplize(graphs) visualization = gridplot(arranged_graphs) show(visualization)
bar_width=0.7, title="(v2) Valence with MSE") p_aro_r2 = Bar(result_ridge_aro, values='r2', legend=None, color='orange', bar_width=0.7, title="(a1) Arousal with R2 score") p_aro_mse = Bar(result_ridge_aro, values='mse', legend=None, color='orange', bar_width=0.7, title="(a2) Arousal with MSE") show( gridplot([[p_val_r2, p_aro_r2], [p_val_mse, p_aro_mse]], plot_width=450, plot_height=400)) # In[27]: result_ridge_val.join(result_ridge_aro, lsuffix='_val', rsuffix='_aro') # ## SVR # In[8]: get_ipython().magic(u'timeit') val_svr['r2']['all'], val_svr['mse']['all'] = test_regr( GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5, n_jobs=-1,
from bokeh.charts import Donut, show, output_file from bokeh.charts.utils import df_from_json from bokeh.sampledata.olympics2014 import data import pandas as pd # utilize utility to make it easy to get json/dict data converted to a dataframe df = df_from_json(data) # filter by countries with at least one medal and sort by total medals df = df[df['total'] > 8] df = df.sort_values(by="total", ascending=False) df = pd.melt(df, id_vars=['abbr'], value_vars=['bronze', 'silver', 'gold'], value_name='medal_count', var_name='medal') # original example d = Donut(df, label=['abbr', 'medal'], values='medal_count', text_font_size='8pt', hover_text='medal_count') output_file("donut.html", title="donut.py example") show(d)