def add_categorical_colormap(self, palette, categorical_name, **kwargs): """ Create a Categorical Colormap Parameters ---------- palette: str or Tuple The color palette of the colormap. It can either be one of Bokeh's default palettes or a Tuple of colors in hexadecimal format. categorical_name: str The column name of the loaded dataset that contains the categorical values Returns ------- cmap: Dict The Categorical Colormap """ if not (isinstance(palette, tuple) or palette in ALLOWED_CATEGORICAL_COLOR_PALLETES): raise ValueError(f'Invalid Palette Name/Tuple. Allowed (pre-built) Palettes: {ALLOWED_CATEGORICAL_COLOR_PALLETES}') categories = sorted(np.unique(self.source.data[categorical_name]).tolist()) palette = palette if isinstance(palette, tuple) else getattr(palettes, palette)[len(categories)] # print(categories) cmap = bokeh_mdl.CategoricalColorMapper(palette=palette, factors=categories, **kwargs) # self.cmap = {'type':'add_categorical_colormap', 'cmap':{'field': categorical_name, 'transform': cmap}} self.cmap = {'field': categorical_name, 'transform': cmap} return self.cmap
def output_html_graph(df, out_path, ): df["cluster"] = df["cluster"].apply(str) # Get cds cds = ColumnDataSource(df) # # use whatever palette you want... palette = d3['Category20'][(len(df['cluster'].unique()) % 19) + 2] color_map = bmo.CategoricalColorMapper(factors=df['cluster'].unique(), palette=palette) # Define tooltipts TOOLTIPS = [ ("dtag", "@dtag"), ("(component_0,component_1)", "($x, $y)"), ("cluster", "@cluster") ] # Gen figure p = figure(plot_width=800, plot_height=800, tooltips=TOOLTIPS, title="Mouse over the dots") # Plot data p.circle('component_0', 'component_1', color={'field': 'cluster', 'transform': color_map}, size=10, source=cds) # Save figure save(p, str(out_path), )
def clicks_v_impressions(data, click_lower_bound=5, output=False, semantic_term='PriceResearch'): """ :param data: The dataframe of download.csv :param click_lower_bound: Records with clicks fewer than this number are excluded from the plot (for performance). :param output: If false, assumes the function is running from a Jupyter notebook, if a path as a string, will write the plot as a .html file to the supplie path. """ # Generate random portfolio values # TODO remove this after the classification package is operable test_portfolio_vals = np.array( ['paid', 'paid | organic', 'none', 'organic']) data['Portfolio Classification'] = test_portfolio_vals[np.random.randint( 0, len(test_portfolio_vals), len(data))] palette = d3['Category10'][len(data['Portfolio Classification'].unique())] # First, plot impressions vs. clicks and the tooltip will be the search query itself data = data[data['Clicks'] > click_lower_bound] bool_col = data['Semantic Classification'].str.contains(semantic_term) booleanDictionary = { True: 'Contains {}'.format(semantic_term), False: 'Does not Contain {}'.format(semantic_term) } data['bool'] = bool_col.map(booleanDictionary) color_map = bmo.CategoricalColorMapper(factors=data['bool'].unique(), palette=palette) source = ColumnDataSource(data=data) # Now we will generate the binary category TOOLTIPS = [('Search Term', '@{Search term}'), ('Cost', '@Cost')] if output == False: output_notebook() elif type(output) == str: output_file(output) p = figure(plot_width=800, plot_height=400, tooltips=TOOLTIPS) p.circle(x='Clicks', y='Impressions', source=source, color={ 'field': 'bool', 'transform': color_map }, legend='bool') p.xaxis.axis_label = 'Paid Clicks' p.yaxis.axis_label = 'Paid Impressions' # Plotting parameters show(p)
def exportTFIDF_HTML(tsne_tfidf_df, filename='images/tfidf.html', char_lenght=200, title="TF-IDF Clustering", plot_width=890, plot_height=600): # ---- Visualize INTERACTIVE 2-D graph using bokeh ---- from bokeh.resources import CDN from bokeh.embed import file_html import bokeh.plotting as bp from bokeh.palettes import d3 import bokeh.models as bmo from bokeh.models import HoverTool, BoxSelectTool tsne_tfidf_df['description'] = tsne_tfidf_df['description'].apply( lambda x: x[1:char_lenght]) plot_tfidf = bp.figure( plot_width=plot_width, plot_height=plot_height, title=title, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) # string = "Category%s" %(len(tsne_tfidf_df['category'].unique())) palette = d3["Category20"][len(tsne_tfidf_df['category'].unique())] color_map = bmo.CategoricalColorMapper( factors=tsne_tfidf_df['category'].map(str).unique(), palette=palette) plot_tfidf.scatter(x='x', y='y', color={ 'field': 'category', 'transform': color_map }, legend='category', source=tsne_tfidf_df) hover = plot_tfidf.select(dict(type=HoverTool)) hover.tooltips = {"description": "@description", "category": "@category"} html = file_html(plot_tfidf, CDN, "Plot") f = open(filename, 'w') f.write(html) f.close()
def plotTSNE_DF(tsne_df, filename='tSNE_Graph.html', char_lenght=50, title="tSNE Clustering", plot_width=890, plot_height=501): def _printtext(x, char_lenght): return " ".join(x.split()[0:char_lenght]) tsne_df['description'] = tsne_df['description'].apply( lambda x: _printtext(x, char_lenght)) plot_tsne = bp.figure( plot_width=plot_width, plot_height=plot_height, title=title, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) palette = d3['Category20'][20] + d3['Category20b'][20] color_map = bmo.CategoricalColorMapper(factors=tsne_df['topic'].unique(), palette=palette) plot_tsne.scatter('x', 'y', source=tsne_df, color={ 'field': 'topic', 'transform': color_map }, legend='topic') hover = plot_tsne.select(dict(type=HoverTool)) hover.tooltips = { "description": "@description", "topic": "@topic", "category": "@category", "username": "******" } html = file_html(plot_tsne, CDN, "Plot") f = open(filename, 'w') f.write(html) f.close()
def make_color_mapper(y_values, y_type, formatting): """ Generates color mapper which takes in values and outputs the color hexcode. :param (pd.Series) y_values: pandas Series to be plotted, for calculating min/max :param (str) y_type: 'sequential', 'divergent', or 'categorical' -- for palette :param (dict) formatting: see DEFAULTFORMAT from params.py :return: Bokeh colormapper object """ try: palette = PALETTES[y_type][formatting['palette']][ formatting['ncolors']].copy() except KeyError: ## if palette is not in default list palette = get_palette_colors(formatting['palette'], formatting['ncolors']).copy() except TypeError: ## if formatting['palette'] is a list palette = formatting['palette'].copy() if formatting['reverse_palette']: palette.reverse() if y_type in ['sequential', 'divergent']: c_min = formatting['min'] if isinstance(formatting['min'], (int, float)) else min(y_values) c_max = formatting['max'] if isinstance(formatting['max'], (int, float)) else max(y_values) below_color = formatting['low_color'] if isinstance( formatting['low_color'], str) else None above_color = formatting['high_color'] if isinstance( formatting['low_color'], str) else None mapper_fx = { 'lin': models.LinearColorMapper, 'log': models.LogColorMapper } mapper = mapper_fx[formatting['lin_or_log']](palette=palette, low=c_min, high=c_max, low_color=below_color, high_color=above_color) else: mapper = models.CategoricalColorMapper(factors=y_values.unique(), palette=palette) return mapper
def exportKmeansDF(kmeans_df, filename='KMeansGraph.html', char_lenght=200, title="KMeans clustering", plot_width=890, plot_height=600): import bokeh.plotting as bp from bokeh.palettes import d3 import bokeh.models as bmo from bokeh.models import HoverTool, BoxSelectTool from bokeh.embed import file_html from bokeh.resources import CDN plot_kmeans = bp.figure( plot_width=plot_width, plot_height=plot_height, title=title, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) palette = d3['Category20'][20] + d3['Category20b'][20] color_map = bmo.CategoricalColorMapper( factors=kmeans_df['cluster'].unique(), palette=palette) plot_kmeans.scatter('x', 'y', source=kmeans_df, color={ 'field': 'cluster', 'transform': color_map }, legend='cluster') hover = plot_kmeans.select(dict(type=HoverTool)) hover.tooltips = { "description": "@description", "cluster": "@cluster", "category": "@category" } html = file_html(plot_kmeans, CDN, "Plot") f = open(filename, 'w') f.write(html) f.close()
def exportKmeansDF(kmeans_df, filename = 'KMeansGraph.html', char_lenght = 200, title="KMeans clustering", plot_width=890, plot_height=600): plot_kmeans = bp.figure( plot_width=plot_width , plot_height=plot_height , title=title , tools= "pan,wheel_zoom,box_zoom,reset,hover" , x_axis_type=None, y_axis_type=None, min_border=1) palette = d3['Category20'][12] + d3['Category20b'][12] color_map = bmo.CategoricalColorMapper( factors=kmeans_df['cluster'].unique() , palette=palette ) plot_kmeans.scatter('x', 'y', source=kmeans_df, color={'field': 'cluster', 'transform': color_map}, legend='cluster') hover = plot_kmeans.select(dict(type=HoverTool)) hover.tooltips={"description": "@descripcion_del_proceso", "cluster": "@cluster"} html = file_html(plot_kmeans, CDN, "Plot") output_file(filename, title=title, mode='inline', root_dir=None) save(plot_kmeans)
'new_vaccinations_smoothed', 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred', 'new_vaccinations_smoothed_per_million', 'stringency_index', 'population', 'population_density', 'median_age', 'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty', 'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers', 'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand', 'life_expectancy', 'human_development_index' ], aggfunc=np.mean) #covid_pivoted_whole= covid_pivoted.merge(continent, on='iso_code') covid_pivoted['continent'] = [index[1] for index in covid_pivoted.index] source = bpl.ColumnDataSource(covid_pivoted) # use whatever palette you want... palette = d3['Category10'][len(covid_pivoted['continent'].unique())] color_map = bmo.CategoricalColorMapper( factors=covid_pivoted['continent'].unique(), palette=palette) # create figure and plot p = bpl.figure() p.scatter(x='total_tests_per_thousand', y='total_cases_per_million', color={ 'field': 'continent', 'transform': color_map }, legend_label='continent', source=source) boi.output_file('allatok.html') bpl.show(p)
source = ColumnDataSource(plot_df) plot_df['since'] = [2019 - ele for ele in plot_df.since] # tooltips for plot TOOLTIPS = [ ("#species", "@n_species"), ("bird common", "@species_common"), ("travel distance", "@median_travel_distance"), ("#checklists", "@n_checklists"), ("group size", "@mean_group_size"), ("starting year", "@since"), ("median_start", "@median_start"), ("percent_travel", "@percent_travel") ] # plot users palette = d3['Category20'][max(3, len(plot_df['label'].unique()))] color_map = bmo.CategoricalColorMapper(factors=plot_df['label'].unique(), palette=palette) # create figure and plot birders_plot = figure(title=' ', tooltips=TOOLTIPS) birders_plot.circle('x', 'y', color='black', fill_color={'field': 'label', 'transform': color_map}, size=6, alpha=0.2, fill_alpha=0.6, source=source) birders_plot.toolbar.logo = None birders_plot.toolbar_location = None # save output output_file('users_NY_2005-2019.html') show(birders_plot)
plt.margins(0.02) #Creating Best Fit Line from sklearn.linear_model import LinearRegression line_model = LinearRegression() line_length = np.linspace(min(restrict["MP/G"]), max(restrict["MP/G"])).reshape(-1, 1) line_model.fit(restrict[["MP/G"]], restrict[["PTS/G"]]) bfline = line_model.predict(line_length) import bokeh.plotting as bp import bokeh.models as bm import bokeh.io as bi source = bp.ColumnDataSource(restrict) color_scat = bm.CategoricalColorMapper(factors=["PG", "SG", "SF", "PF", "C"], palette=color) hover = bm.HoverTool(tooltips=[("Player", "@Player"), ("Position", "@Pos"), ("Tm", "@Tm")]) plot = bp.figure(title="2017-2018 NBA Season", title_location="above", x_axis_label="Minutes Played", y_axis_label="Points Scored", tools=[hover, "pan", "wheel_zoom"]) plot.circle(x="MP/G", y="PTS/G", source=source, color=dict(field="Pos", transform=color_scat)) #plot.line([line_length], [bfline], line_color = "black", line_width=3)
def visualization(tsne_lda, _lda_keys, descriptors, divergent_keys, divergent_topics, labels, outdir, model_num): df = pd.DataFrame({ "descriptors": descriptors, "topic": [str(i) for i in divergent_keys], "x": tsne_lda[:, 0], "y": tsne_lda[:, 1], }) df_cor = pd.DataFrame() df_cor['x'] = tsne_lda[:, 0] df_cor['y'] = tsne_lda[:, 1] df_cor['topics'] = _lda_keys coordinates = [] for topic in divergent_topics: temp_df = df_cor[df_cor['topics'] == topic] x_cor = np.average(temp_df['x'].tolist()) y_cor = np.average(temp_df['y'].tolist()) coordinates.append([x_cor, y_cor]) source = bpl.ColumnDataSource(df) unique_labels = list(set(labels)) colors = itertools.cycle(palette_dark25) palette = [next(colors) for i in range(len(unique_labels))] palette = palette + ['gray'] color_map = bmo.CategoricalColorMapper( factors=[str(i) for i in unique_labels] + ['others'], palette=palette) TOOLS = "hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select," p = bpl.figure( plot_width=2000, plot_height=2000, # title=title, tools=TOOLS, x_axis_type=None, y_axis_type=None, min_border=1) p.scatter(x='x', y='y', color={ 'field': 'topic', 'transform': color_map }, legend='topic', source=source, fill_alpha=0.3, radius=0.2) hover = p.select(dict(type=HoverTool)) hover.tooltips = """<font size='5pt'>Corpus: @topic <br /> Descriptors: @descriptors</font>""" for i in range(len(divergent_topics)): p.text(coordinates[i][0] - 5, coordinates[i][1], [divergent_topics[i]], text_color="black", text_font_size='50pt', text_font_style='bold') bpl.output_file(outdir + "%s_%d_topics.html" % ('visualization', model_num)) bpl.show(p)
df_nans.to_dict('list') ) ## Bokeh worflow # output_notebook() # # Figures and colormaps # p_answered = figure(tools=TOOLS) p_nanswered = figure(tools=TOOLS) ans_cmap = bmo.CategoricalColorMapper( factors=df_ans.cluster.unique(), palette=all_palettes['Set1'][len(df_ans.cluster.unique())] ) nans_cmap = bmo.CategoricalColorMapper( factors=df_nans.cluster.unique(), palette=all_palettes['Set1'][len(df_nans.cluster.unique())] ) # this changes the order with which the colors are applied # # Visualizations # p_answered.scatter( x='x', y='y', source=src_ans,
## Bokeh worflow # output_notebook() # # Figures and colormaps # p_answered = figure(tools=TOOLS) p_nanswered = figure(tools=TOOLS) # palettes assume 3 if number of clusters less than it color_palettes = lambda x: all_palettes['Set1'][3] if (len(x) < 3) else all_palettes['Set1'][len(x)] ans_cmap = bmo.CategoricalColorMapper( factors=df_ans.kmeans_cluster.unique(), palette=color_palettes(df_ans.kmeans_cluster.unique()) ) nans_cmap = bmo.CategoricalColorMapper( factors=df_nans.kmeans_cluster.unique(), palette=color_palettes(df_nans.kmeans_cluster.unique()) ) # this changes the order with which the colors are applied # # Visualizations # p_answered.scatter( x='x', y='y', source=src_ans,
plot_title = "KMeans Clustering of the News: %s - %s (%s articles)" % ( title_format_datetime(from_datetime), title_format_datetime(to_datetime), len(data.index)) plot_kmeans = bp.figure( plot_width=1600, plot_height=1200, title=plot_title, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) palette = d3['Category20'][20] + d3['Category20b'][20] + d3['Category20c'][ 20] + brewer['BrBG'][9] + brewer['RdYlBu'][11] color_map = bmo.CategoricalColorMapper(factors=kmeans_df['cluster'].unique(), palette=palette) plot_kmeans.scatter('x', 'y', source=kmeans_df, color={ 'field': 'cluster', 'transform': color_map }) hover = plot_kmeans.select(dict(type=HoverTool)) hover.tooltips = { "Category": "@category", "Cluster": "@Desc", "Description": "@description" }
def graphVectorSpace(tfidfVectors, extraColumns, dateForTitle, storyMap, threshold): # Better results seem to be obtained by breaking the dimensionality reduction into two steps # First reduce to fifty dimensions with SVD from sklearn.decomposition import TruncatedSVD svd = TruncatedSVD(n_components=50, random_state=0) svdResults = svd.fit_transform(tfidfVectors) # Next continue to two dimensions with TSNE from sklearn.manifold import TSNE tsneModel = TSNE(n_components=2, verbose=1, random_state=0, n_iter=500) tsneResults = tsneModel.fit_transform(svdResults) tfidf2dDataFrame = pd.DataFrame(tsneResults) tfidf2dDataFrame.columns = ['x', 'y'] tfidf2dDataFrame['publication'] = extraColumns['publication'] tfidf2dDataFrame['id'] = extraColumns['id'] tfidf2dDataFrame['content'] = extraColumns['content no nonascii'].map( lambda x: x[:200]) # All articles will be marked as NA to indicate that they have not been assigned to a story # Then those which have been assigned one will be updated to refer to that tfidf2dDataFrame['category'] = 'NA' # If the threshold is not provided, then just graph the vector space as is # With colours indicating desired story grouping # This still has value because it shows how well stories cluster together if threshold == None: graphTitle = ( "TF-IDF article clustering - story assignment from map - " + dateForTitle[0]) for story, storyArticles in storyMap.items(): for article in storyArticles: if len(tfidf2dDataFrame[tfidf2dDataFrame['id'] == article].index) == 1: i = tfidf2dDataFrame[tfidf2dDataFrame['id'] == article].index[0] tfidf2dDataFrame['category'][i] = story else: graphTitle = ( "TF-IDF article clustering - story assignment computed - " + dateForTitle[0]) nonZeroCoords = initialiseAllNonZeroCoords(tfidfVectors) for story, storyArticles in storyMap.items(): leadArticleIndex = extraColumns[extraColumns['id'] == storyArticles[0]].index[0] # Compute score of all articles in corpus relative to first article in story (.product) scores = productRelatednessScores(tfidfVectors, nonZeroCoords, leadArticleIndex) rankedIndices = np.argsort(scores) for article in rankedIndices: if scores[article] >= threshold: tfidf2dDataFrame['category'][article] = story import bokeh.plotting as bp from bokeh.models import HoverTool from bokeh.plotting import show from bokeh.palettes import d3 import bokeh.models as bmo plot_tfidf = bp.figure( plot_width=700, plot_height=600, title=graphTitle, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) numCats = len(tfidf2dDataFrame['category'].unique()) palette = d3['Category20'][numCats] color_map = bmo.CategoricalColorMapper( factors=tfidf2dDataFrame['category'].map(str).unique(), palette=palette) plot_tfidf.scatter(x='x', y='y', color={ 'field': 'category', 'transform': color_map }, legend='category', source=tfidf2dDataFrame) hover = plot_tfidf.select(dict(type=HoverTool)) plot_tfidf.legend.click_policy = "hide" hover.tooltips = { "id": "@id", "publication": "@publication", "content": "@content", "category": "@category" } show(plot_tfidf)
from bokeh.plotting import figure, show, output_notebook, reset_output from bokeh.palettes import d3 import bokeh.models as bmo from bokeh.io import save, output_file output_notebook() plot_tfidf = bp.figure(plot_width=700, plot_height=600, title="TF-IDF Clustering of the reviews", tools="pan,wheel_zoom,box_zoom,reset,hover,save", x_axis_type=None, y_axis_type=None, min_border=1) palette = d3['Category10'][len(tsne_tfidf_df['category'].unique()) + 1] color_map = bmo.CategoricalColorMapper( factors=tsne_tfidf_df['category'].map(str).unique(), palette=palette) plot_tfidf.scatter(x='x', y='y', color={ 'field': 'category', 'transform': color_map }, legend_group='category', source=tsne_tfidf_df) hover = plot_tfidf.select(dict(type=HoverTool)) hover.tooltips = {"description": "@description", "category": "@category"} show(plot_tfidf) # In the bokeh plot, each cluster is determined by the variety of unique words commonly used in the reviews. Reviews that are clustered together tend to contain the same unique words and hence, are grouped together.