def plot_histogram(plt_title, df, ctrl): result = [] x_lab = get_label(ctrl['ypar']) df = df[(df['PARM'] == ctrl['ypar']) & (df['RESULT'] > 0)] df = df[['RESULT']] brush = alt.selection(type='interval', encodings=['x']) if ctrl['max_x'] == ctrl['min_x']: scx = alt.Scale() else: scx = alt.Scale(domain=(ctrl['min_x'], ctrl['max_x'])) #use bin width if user defined if ctrl['bin_size'] > 0: bin_def = alt.Bin(step=ctrl['bin_size']) else: bin_def = alt.Bin() base = alt.Chart(df, title=plt_title).mark_bar().encode( alt.X("RESULT:Q", bin=bin_def, title=x_lab, scale=scx), y='count()', ) result.append(base) result.append(df) return result
def heatmap( data=None, x=0, y=1, color=2, opacity=None, aggregate="average", height=600, width=800, ): """Generate a heatmap.""" data, nx, ny = maxbins(data) if color is None: color = "" aggregate = "count" color = alt.Color( aggregate + "(" + color + "):Q", scale=hue_scale_dark if opacity is not None else hue_scale_light, ) enc_opt_args = dict(opacity=opacity) if opacity is not None else dict() return (alt.Chart(data, height=height, width=width).mark_rect().encode( x=alt.X(x, bin=alt.Bin(maxbins=nx)), y=alt.Y(y, bin=alt.Bin(maxbins=ny)), color=color, **enc_opt_args, ))
def violinplot(x=None, y=None, data=None, orient=None): # TODO: automatically infer orientation if orient is None or orient == 'v': kwargs = dict( x=alt.X('count(*):Q', axis=alt.Axis(grid=False, labels=False), stack='center', title=''), y=alt.Y('{y}:Q'.format(y=y), bin=alt.Bin(maxbins=100)), column='{x}:N'.format(x=x), color='{x}:N'.format(x=x) ) else: kwargs = dict( y=alt.Y('count(*):Q', axis=alt.Axis(grid=False, labels=False), stack='center', title=''), x=alt.X('{x}:Q'.format(x=x), bin=alt.Bin(maxbins=100)), row='{y}:N'.format(y=y), color='{y}:N'.format(y=y) ) chart = alt.Chart(data).mark_area().encode(**kwargs) return chart
def get_histogram_with_scatterplot(data, x_variable, y_variable): base = alt.Chart(data) area_args = {'opacity': .3, 'interpolate': 'step'} points = base.mark_circle().encode(alt.X(x_variable), alt.Y(y_variable), color='species', size=y_variable) # top histogram top_hist = base.mark_area(**area_args).encode( alt.X(x_variable + ':Q', bin=alt.Bin(maxbins=20), stack=None, title=''), alt.Y('count()', stack=None, title=''), alt.Color('species:N'), ).properties(height=200) # right histogram right_hist = base.mark_area(**area_args).encode( alt.Y( y_variable + ':Q', bin=alt.Bin(maxbins=20), stack=None, title='', ), alt.X('count()', stack=None, title=''), alt.Color('species:N'), ).properties(width=200) graph = top_hist & (points | right_hist) return graph
def plot_heatmap(df, x_name='price'): """ Plot a heatmap of showing the average value of wines from popular grape varieties at a range of price points. Parameters: ----------- df -- (pandas DataFrame) Cleaned data in a dataframe. Returns altiar plot objects. """ # register the custom theme under a chosen name alt.themes.register('vino_special', vino_special) # enable the newly registered theme alt.themes.enable('vino_special') varieties_chart_data = wrangle_varieties(df) if x_name == 'price': varieties_heatmap_plot = alt.Chart( varieties_chart_data.query('price < 50') ).mark_rect().encode( x=alt.X(x_name + ':Q', bin=alt.Bin(maxbins=10), title="Price ($)"), y=alt.Y('variety:O', title="Grape Variety"), color=alt.Color('average(value_scaled):Q', scale=alt.Scale(scheme="bluepurple"), legend=alt.Legend(orient='right', title="Average Value")), tooltip=[ alt.Tooltip('average(points):Q', format='.2f'), alt.Tooltip('average(price)', format='$.2f'), alt.Tooltip('average(value_scaled)', format='.2f'), alt.Tooltip('count(title)') ] ).properties( title="Average Value Scores for Popular Grape Varieties, by Price" ).configure_axis(grid=False, labelAngle=0) if x_name == 'points': varieties_heatmap_plot = alt.Chart(varieties_chart_data).mark_rect( ).encode( x=alt.X('points:Q', bin=alt.Bin(maxbins=10), title="Rating"), y=alt.Y('variety:O', title="Grape Variety"), color=alt.Color('average(value_scaled):Q', scale=alt.Scale(scheme="bluepurple"), legend=alt.Legend(orient='right', title="Average Value")), tooltip=[ alt.Tooltip('average(points):Q', format='.2f'), alt.Tooltip('average(price)', format='$.2f'), alt.Tooltip('average(value_scaled)', format='.2f'), alt.Tooltip('count(title)') ] ).properties( title="Average Value Scores for Popular Grape Varieties, by Rating" ).configure_axis(grid=False, labelAngle=0) return varieties_heatmap_plot
def plot_model(X, y, model, predict_proba = False): # Join data for plotting sample = (X.join(y)) # Create a mesh for plotting step = (X.max() - X.min()) / 50 x1, x2 = np.meshgrid(np.arange(sample.min()[0]-step[0], sample.max()[0]+step[0], step[0]), np.arange(sample.min()[1]-step[1], sample.max()[1]+step[1], step[1])) # Store mesh in dataframe mesh_df = pd.DataFrame(np.c_[x1.ravel(), x2.ravel()], columns=['x1', 'x2']) # Mesh predictions if predict_proba: mesh_df['predictions'] = model.predict_proba(mesh_df[['x1', 'x2']])[:, 1] # Plot base_plot = alt.Chart(mesh_df).mark_rect(opacity=0.5).encode( x=alt.X('x1', bin=alt.Bin(step=step[0]), axis=alt.Axis(title=X.columns[0])), y=alt.Y('x2', bin=alt.Bin(step=step[1]), axis=alt.Axis(title=X.columns[1])), color=alt.Color('predictions', title='P(red)', scale=alt.Scale(scheme='blueorange')) ).properties( width=400, height=400 ) return alt.layer(base_plot).configure_axis( labelFontSize=20, titleFontSize=20 ).configure_legend( titleFontSize=20, labelFontSize=20 ) else: mesh_df['predictions'] = model.predict(mesh_df[['x1', 'x2']]) # Plot scat_plot = alt.Chart(sample).mark_circle( stroke='black', opacity=1, strokeWidth=1.5, size=100 ).encode( x=alt.X(X.columns[0], axis=alt.Axis(labels=True, ticks=True, title=X.columns[0])), y=alt.Y(X.columns[1], axis=alt.Axis(labels=True, ticks=True, title=X.columns[1])), color=alt.Color(y.columns[0]) ) base_plot = alt.Chart(mesh_df).mark_rect(opacity=0.5).encode( x=alt.X('x1', bin=alt.Bin(step=step[0])), y=alt.Y('x2', bin=alt.Bin(step=step[1])), color=alt.Color('predictions', title='Legend') ).properties( width=400, height=400 ) return alt.layer(base_plot, scat_plot).configure_axis( labelFontSize=20, titleFontSize=20 ).configure_legend( titleFontSize=20, labelFontSize=20 )
def initialize_chart(self): self.tooltip = False measure = self.vis.get_attr_by_data_model("measure", exclude_record=True)[0] msr_attr = self.vis.get_attr_by_channel(measure.channel)[0] x_min = self.vis.min_max[msr_attr.attribute][0] x_max = self.vis.min_max[msr_attr.attribute][1] x_range = abs( max(self.vis.data[msr_attr.attribute]) - min(self.vis.data[msr_attr.attribute])) plot_range = abs(x_max - x_min) markbar = x_range / plot_range * 12 if measure.channel == "x": chart = (alt.Chart(self.data).mark_bar(size=markbar).encode( alt.X( msr_attr.attribute, title=f"{msr_attr.attribute} (binned)", bin=alt.Bin(binned=True), type=msr_attr.data_type, axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=(x_min, x_max)), ), alt.Y("Number of Records", type="quantitative"), )) elif measure.channel == "y": chart = (alt.Chart(self.data).mark_bar(size=markbar).encode( x=alt.X("Number of Records", type="quantitative"), y=alt.Y( msr_attr.attribute, title=f"{msr_attr.attribute} (binned)", bin=alt.Bin(binned=True), axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=(x_min, x_max)), ), )) ##################################### ## Constructing Altair Code String ## ##################################### self.code += "import altair as alt\n" # self.code += f"visData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n" self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n" if measure.channel == "x": self.code += f""" chart = alt.Chart(visData).mark_bar(size={markbar}).encode( alt.X('{msr_attr.attribute}', title='{msr_attr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msr_attr.data_type}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({x_min}, {x_max}))), alt.Y("Number of Records", type="quantitative") ) """ elif measure.channel == "y": self.code += f""" chart = alt.Chart(visData).mark_bar(size={markbar}).encode( alt.Y('{msr_attr.attribute}', title='{msr_attr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msr_attr.data_type}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({x_min}, {x_max}))), alt.X("Number of Records", type="quantitative") ) """ return chart
def test_df_hexbin_C(): df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)}) gridsize = 10 plot = df.vgplot.hexbin(x="x", y="y", C="C", gridsize=gridsize) assert plot.mark == "rect" utils.check_encodings(plot, x="x", y="y", color="C") assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize) assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize) assert plot["encoding"]["color"]["aggregate"] == "mean"
def plot_heat(selected_state, axis, price_value, points_value): if selected_state == 'select your state': df_filtered = df else: if type(selected_state) == list: df_filtered = df[df['state'].isin(selected_state)] else: df_filtered = df[df['state'] == selected_state] df_filtered = df_filtered[(df_filtered['price'] >= min(price_value)) & (df_filtered['price'] <= max(price_value))] df_filtered = df_filtered[(df_filtered['points'] >= min(points_value)) & (df_filtered['points'] <= max(points_value))] if axis == 'price': heatmap = alt.Chart( df_filtered.query('price < 100')).mark_rect().encode( x=alt.X("price" + ':Q', bin=alt.Bin(maxbins=10), title="Price($)"), y=alt.Y('variety:O', title="Wine Variety"), color=alt.Color('average(price):Q', scale=alt.Scale(scheme="bluepurple"), legend=alt.Legend(orient='right', title="Average price")), tooltip=[ alt.Tooltip('average(points):Q', format='.2f'), alt.Tooltip('average(price)', format='$.2f'), alt.Tooltip('average(value)', format='.2f'), alt.Tooltip('count(title)') ]).properties(title="Average price for Popular Grape Varieties" ).configure_axis(labelFontSize=12, titleFontSize=12, grid=False, labelAngle=0).properties( width=300, height=300) if axis == "points": heatmap = alt.Chart(df_filtered).mark_rect().encode( x=alt.X("points" + ':Q', bin=alt.Bin(maxbins=10), title="Rating Score"), y=alt.Y('variety:O', title="Wine Variety"), color=alt.Color('average(points):Q', scale=alt.Scale(scheme="bluepurple"), legend=alt.Legend(orient='right', title="Average rating")), tooltip=[ alt.Tooltip('average(points):Q', format='.2f'), alt.Tooltip('average(price)', format='$.2f'), alt.Tooltip('average(value)', format='.2f'), alt.Tooltip('count(title)') ]).properties(title="Average rating for Popular Grape Varieties" ).configure_axis(labelFontSize=12, titleFontSize=12, grid=False, labelAngle=0).properties(width=300, height=300) return heatmap.to_html()
def _heatmap(table: alt.Data, *, is_ticks: bool, name: str, x_name: str, range_color: str, height: int = None, width: int = None, selection: alt.Selection = None, x_scale: alt.Scale = alt.Undefined, y_scale: alt.Scale = alt.Undefined, brush: alt.Selection = alt.Undefined) -> alt.Chart: base = alt.Chart(table) if selection is not None: base = base.add_selection(selection) if not is_ticks: scat_x_title = x_name scat_y_title = name else: scat_x_title = '' scat_y_title = '' scat = (base.mark_rect().encode(x=alt.X('x:Q', scale=x_scale, title=scat_x_title, bin=alt.Bin(maxbins=50, extent=brush)), y=alt.Y('y:Q', scale=y_scale, title=scat_y_title, bin=alt.Bin(maxbins=50, extent=brush)), color=alt.Color( 'count():Q', scale=alt.Scale(scheme='greenblue')))) if is_ticks: tick_axis = alt.Axis(labels=False, domain=False, ticks=False) x_ticks = base.mark_tick().encode(x=alt.X('x:Q', axis=tick_axis, scale=x_scale, title=x_name), color=alt.value(range_color)) y_ticks = alt.Chart(table).mark_tick().encode( y=alt.X('y:Q', axis=tick_axis, scale=y_scale, title=name), color=alt.value(range_color)) scat = scat.properties(width=width, height=height) x_ticks = x_ticks.properties(width=width) y_ticks = y_ticks.properties(height=height) scat = y_ticks | (scat & x_ticks) return scat
def plot_ratio(lengths_data, fullpath): logging.info("Generating plots") lengths_data = pandas.read_csv(lengths_data, sep="\t") hist = altair.Chart(lengths_data)\ .mark_bar(clip = True)\ .encode(x = altair.X('codingRatio:Q', bin = altair.Bin(step = 0.1), scale = altair.Scale(domain=(0, 2)), axis = altair.Axis(title='Query/Reference Ratio') ), y = 'count()', tooltip = 'count()')\ .configure_mark( fill = 'red', stroke = 'black') histzoom = altair.Chart(lengths_data)\ .mark_bar(clip = True)\ .encode(x = altair.X('codingRatio:Q', bin = altair.Bin(step = 0.1), scale = altair.Scale(domain=(0, 2)), axis = altair.Axis(title='Query/Reference Ratio') ), y = altair.Y('count()', scale = altair.Scale(domain = (0,100)) ), tooltip = 'count()')\ .configure_mark( fill = ' #c658dd ', stroke = 'black') genomeRatio = altair.Chart(lengths_data)\ .mark_line(clip = True)\ .encode(x = altair.X('geneNumber:Q', scale = altair.Scale(domain = (0, len('geneNumber'))) ), y = altair.Y('codingRatio:Q', scale = altair.Scale(type = 'log')), tooltip = 'codingRatio:Q')\ .interactive() # save outputs logging.info("Saving image files to html") hist.save(fullpath + '-ratioplot-full' + '.html') histzoom.save(fullpath + '-ratioplot-zoom' + '.html') genomeRatio.save(fullpath + '-ratioplot-genome' + '.html') # print("Saving image files to png") # hist.save(fullpath + '-ratioplot-full' + '.png', webdriver='firefox') # histzoom.save(fullpath + '-ratioplot-zoom' + '.png', webdriver='firefox') # genomeRatio.save(fullpath + '-ratioplot-genome' + '.png', webdriver='firefox') return None
def initializeChart(self): self.tooltip = False measure = self.view.getAttrByDataModel("measure", excludeRecord=True)[0] msrAttr = self.view.getAttrByChannel(measure.channel)[0] xMin = self.view.xMinMax[msrAttr.attribute][0] xMax = self.view.xMinMax[msrAttr.attribute][1] xRange = abs( max(self.view.data[msrAttr.attribute]) - min(self.view.data[msrAttr.attribute])) plotRange = abs(xMax - xMin) markbar = xRange / plotRange * 12 if (measure.channel == "x"): chart = alt.Chart(self.data).mark_bar(size=markbar).encode( alt.X(msrAttr.attribute, title=f'{msrAttr.attribute} (binned)', bin=alt.Bin(binned=True), type=msrAttr.dataType, axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=(xMin, xMax))), alt.Y("Count of Records", type="quantitative")) elif (measure.channel == "y"): chart = alt.Chart(self.data).mark_bar(size=markbar).encode( x=alt.X("Count of Records", type="quantitative"), y=alt.Y(msrAttr.attribute, title=f'{msrAttr.attribute} (binned)', bin=alt.Bin(binned=True), axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=(xMin, xMax)))) ##################################### ## Constructing Altair Code String ## ##################################### self.code += "import altair as alt\n" # self.code += f"viewData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n" self.code += f"viewData = pd.DataFrame({str(self.data.to_dict())})\n" if (measure.channel == "x"): self.code += f''' chart = alt.Chart(viewData).mark_bar(size={markbar*3}).encode( alt.X('{msrAttr.attribute}', title='{msrAttr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msrAttr.dataType}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({xMin}, {xMax}))), alt.Y("Count of Records", type="quantitative") ) ''' elif (measure.channel == "y"): self.code += f''' chart = alt.Chart(viewData).mark_bar(size={markbar*3}).encode( alt.Y('{msrAttr.attribute}', title='{msrAttr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msrAttr.dataType}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({xMin}, {xMax}))), alt.X("Count of Records", type="quantitative") ) ''' return chart
def main(data_path, file_path): data = pd.read_csv(f"{data_path}", index_col=0) #First plot number_of_properties_by_price = alt.Chart(data).mark_bar(clip = True).encode( alt.X('price:Q', scale=alt.Scale(domain=(0, 700)), bin=alt.Bin(extent=[0, 700], step=25), title='Nightly price'), alt.Y('count()', title='No. of properties') ).properties(width=600, height = 300, title = 'Number of properties by nightly price (between $0 and $700+)') number_of_properties_by_price.save(os.path.join(file_path, 'number_of_properties_by_price.png')) #Piece of wrangling for next 2 plots. Assigning labels to columns price_data_labels = data[['price', 'neighbourhood_cleansed', 'property_type']] price_data_labels['label'] = pd.cut(price_data_labels['price'], bins=[0, 100, 300, 500, 13000], include_lowest=True, labels=['low', 'mid', 'high', 'exceptional']) #Second plot Neighborhoods = alt.Chart(price_data_labels).mark_rect().encode( alt.X('neighbourhood_cleansed:N', title="Neighborhoods"), alt.Y('price:Q', bin=alt.Bin(extent=[0, 700], step=50), title="Nightly price ($ CAD)"), alt.Color('count()') ).properties(title = "Price of property by neighborhood") #First and second side bt side concat_1_2 = (number_of_properties_by_price | Neighborhoods).configure_title(fontSize=20 ).configure_axis(labelFontSize=13,titleFontSize=17 ).configure_legend(labelFontSize = 13, titleFontSize=15) concat_1_2.save(os.path.join(file_path, 'neighborhoods.png')) #Third plot price_by_property_type = alt.Chart(price_data_labels).mark_rect().encode( alt.X('price:Q', bin=alt.Bin(extent=[0, 700], step=50), title="Nightly price ($ CAD)"), alt.Y('property_type:N', title="Property Type"), alt.Color('count()') ).configure_title(fontSize=20 ).configure_axis(labelFontSize=13, titleFontSize=17 ).configure_legend(labelFontSize = 13, titleFontSize=15) price_by_property_type.save(os.path.join(file_path, 'price_by_property_type.png'))
def render(table: alt.Data, *, height: int, width: int, height_minimap: int): brush = alt.selection_interval(encodings=['x']) base = alt.Chart(table).mark_bar().encode(y='count():Q') detail = base.encode( alt.X('value:Q', bin=alt.Bin(maxbins=30, extent=brush), scale=alt.Scale(domain=brush))).properties(width=width, height=height) minimap = base.encode(alt.X('value:Q', bin=alt.Bin( maxbins=30)), ).add_selection(brush).properties(width=width, height=height_minimap) return detail & minimap
def histogram2d(x, y, x_name='x', y_name='y'): frame = build_histogram2d_dataframe(x, y, x_name, y_name) # plot = altair.Chart(frame).mark_circle().encode( # altair.X(x_name, bin=True), # altair.Y(y_name, bin=True), # size='count()' # ).interactive() plot = altair.Chart(frame).mark_rect().encode( altair.X(x_name, bin=altair.Bin(maxbins=60)), altair.Y(y_name, bin=altair.Bin(maxbins=40)), altair.Color('count()', scale=altair.Scale(scheme='greenblue'))) return plot
def generate_wage_histogram(data_frame, output_folder, file_name): """ Generates an Altair chart in which 'wage histogram' can be seen. Also saves resulting chart as png file in given output folder. Parameters: ----------- data_frame : pandas.DataFrame input path to be verified output_folder : str output folder path to save the chart file_name : str file name for generated chart image Returns: ----------- str saved file path """ chart = alt.Chart(data_frame).mark_bar().encode( x=alt.X("Wage", bin=alt.Bin(maxbins=40), title='Wage(in K)'), y=alt.Y('count()', title='Number of players')).properties( title='Distribution of Wage') return save_altair_chart(chart, output_folder, file_name)
def make_plot_4(year_range=[2013, 2016], animal="All"): # Filtering for intake year via common filter if animal == "All": df4 = data_new4[((data_new4['intake_year'] >= year_range[0]) & (data_new4['intake_year'] <= year_range[1]))] title_string = "Animal" # Filtering for intake year via common filter and animal type else: df4 = data_new4[((data_new4['intake_year'] >= year_range[0]) & (data_new4['intake_year'] <= year_range[1]) & (data_new4['animal_type'] == animal))] title_string = animal + "s" chart = alt.Chart(df4).mark_bar().encode( alt.X("age:Q", bin=alt.Bin(step=1), title="Age (years)"), alt.Y('count():Q', stack=None, title="Count"), tooltip=['count():Q', 'age:Q']).properties( title=title_string + ' Intake Age Distribution', width=280, height=250).configure_axisX( labelFontSize=12, titleFontSize=15, labelAngle=0).configure_axisY( labelFontSize=12, titleFontSize=15).configure_title(fontSize=18) return chart
def plot_normalization_comparison(): normalized = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/id_normalized.csv', names=['NodeID', 'id']) not_normal = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/id_not_normalized.csv', names=['NodeID', 'id']) # normalization = pd.DataFrame() # normalization['NodeID'] = normalized['NodeID'] # normalization['normal'] = normalized['id'] # normalization['not_normal'] = not_normal['id'] # normalization normalized['normal'] = 1 not_normal['normal'] = 0 norm2 = pd.concat([normalized, not_normal]) radial_input = alt.binding_radio(options=[1, 0]) norm_choice = alt.selection_single(fields=['normal'], bind=radial_input, name='Normalized?') norm_comparison = alt.Chart(norm2).mark_bar().encode( x=alt.X('id:Q', bin=alt.Bin(maxbins=20), title='Interdependency Rating'), y=alt.Y('count()', title='Number of Nodes')).add_selection( norm_choice).transform_filter(norm_choice).transform_filter( alt.datum.NodeID != 64) norm_comparison.serve()
def plot_altair(hist, dist, dist_name, bin_size): brush = alt.selection_interval(encodings=['x']) data = pd.DataFrame.from_dict({ 'rf': hist, 'p': dist }, orient='index').transpose().fillna(0).reset_index() data['index'] = data['index'] * bin_size base = alt.Chart( data, title=f'{dist_name} Estimation of EKA Goals').encode( alt.X( 'index:Q', title='Goals Scored', bin=alt.Bin(step=bin_size), axis=alt.Axis( values=[-5, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55 ]))) bar = base.mark_bar(opacity=.7).encode(alt.Y('rf:Q')) rule = base.mark_rule(size=2).encode( alt.X('index:Q'), alt.Y('p:Q', title='Relative Frequency', axis=alt.Axis(tickCount=5))) return alt.layer(bar, rule).properties(width=600, height=500).configure_axis( titleFontSize=16).configure_title(fontSize=20)
def hist(self, bins=None, stacked=None, orientation="vertical", **kwargs): data = self._preprocess_data(with_index=False) if isinstance(bins, int): bins = alt.Bin(maxbins=bins) elif bins is None: bins = True if orientation == "vertical": Indep, Dep = alt.X, alt.Y elif orientation == "horizontal": Indep, Dep = alt.Y, alt.X else: raise ValueError("orientation must be 'horizontal' or 'vertical'.") mark = self._get_mark_def({ "type": "bar", "orient": orientation }, kwargs) chart = (alt.Chart(data, mark=mark).transform_fold( list(data.columns), as_=["column", "value"]).encode( Indep("value:Q", title=None, bin=bins), Dep("count()", title="Frequency", stack=stacked), color="column:N", )) if kwargs.get("subplots"): nrows, ncols = _get_layout(data.shape[1], kwargs.get("layout", (-1, 1))) chart = chart.encode( facet=alt.Facet("column:N", title=None)).properties( columns=ncols) return chart
def plot_number_of_eggs_by_age(age, count, xlim=(0, 50)): """ To write """ age_group = map_age_to_age_group(age, encode=False) X = np.load('static/data/HFEA_age_and_eggs_collected.npy', allow_pickle=True) age_mask = X[:, 0] == age_group df = pd.DataFrame({'count': X[age_mask, 1]}) hist_title = f'Historical Oocyte count for persons {age_group} years old.' xtitle = 'Number of eggs collected' ytitle = 'Number of historical cycles' hist = (alt.Chart(df, title=hist_title).mark_bar(opacity=0.75).encode( alt.X("count", bin=alt.Bin(extent=[0, 50], step=1), scale=alt.Scale(domain=xlim), title=xtitle), alt.Y('count()', title=ytitle))) # add vertical line highlighting user inputted oocyte count count_vline = pd.DataFrame({'x': [count]}) vline = (alt.Chart(count_vline).mark_rule(color='red', strokeWidth=2, strokeDash=[3, 2], opacity=0.5).encode(x='x:Q')) st.altair_chart(hist + vline)
def exportHistogram(histogram, path): x = range(len(histogram)) df = pd.DataFrame({'X': x, 'Y': histogram}) chart = alt.Chart(df).mark_bar().encode(alt.X( 'X', bin=alt.Bin(maxbins=100)), y='Y') chart.save(path)
def save_ensemble_residual_graphs(save_to, models, X, y): assert isinstance(save_to, str) == True ensemble_residual_df = pd.DataFrame({ 'true_price': y, 'average_ensemble_residual': y - average_ensemble_models(models, X) }) residual_chart = alt.Chart(ensemble_residual_df).mark_circle( size=30, opacity=0.4).encode( x=alt.X('true_price', title='Price'), y=alt.Y('average_ensemble_residual', title='Average ensembling residual')).properties( width=850, height=500).properties( title='Average Ensembling Residuals on Test Data') residual_dist_chart = alt.Chart(ensemble_residual_df).mark_bar().encode( x=alt.X('average_ensemble_residual', title='Average ensembling residual', bin=alt.Bin(extent=[-1200, 2000], step=5)), y='count()').properties( width=850, height=500).properties(title='Ensembling Residual Distribution') with alt.data_transformers.enable('default'): residual_chart.save(save_to + '/ensemble_residual_plot.png') residual_dist_chart.save(save_to + '/ensemble_residual_distribution.png')
def initializeChart(self): self.tooltip = False xAttr = self.dobj.getObjFromChannel("x")[0].columnName yAttr = self.dobj.getObjFromChannel("y")[0].columnName #measures = list(filter(lambda x: x.dataModel=="measure" if hasattr(x,"dataModel") else False,self.dobj.spec)) if (yAttr=="count()"): chart = alt.Chart(self.dataURL).mark_bar().encode( alt.X(xAttr, type="quantitative", bin=alt.Bin(maxbins=50)), alt.Y(yAttr) ) else: chart = alt.Chart(self.dataURL).mark_bar().encode( alt.X(xAttr), alt.Y(yAttr, type="quantitative", bin=alt.Bin(maxbins=50)) ) return chart
def plot_altair_2(xcol): chart = alt.Chart(wine).mark_bar().encode(x=alt.X(xcol, type='quantitative', bin=alt.Bin(maxbins=30)), y=alt.Y('count()'), color='Taste').interactive() return chart.to_html()
def histogram(values: np.array, x_label: str = "Score", y_label: str = "Number of Documents", x_scale: str = "linear", y_scale: str = "linear", max_bins: int = 100): """Displays a histogram of values. This can be really useful for debugging the lengths of documents. Args: values: A numpy array of quantitative values. x_label: A label for the x-axis. y_label: A label for the y-axis. x_scale: A continuous scale type, defined by `altair <https://altair-viz.github.io/user_guide/generated/core/altair.Scale.html>`_. y_scale: A continuous scale type, defined by `altair <https://altair-viz.github.io/user_guide/generated/core/altair.Scale.html>`_. max_bins: The maximum number of histogram bins. """ x = alt.X(f"{x_label}:Q", bin=alt.Bin(maxbins=max_bins), title=x_label, scale=alt.Scale(type=x_scale)) y = alt.Y("count()", title=y_label, scale=alt.Scale(type=y_scale)) return (alt.Chart(pd.DataFrame({x_label: values})).mark_bar().encode(x=x, y=y))
def criar_histograma(coluna, df, sliderH): chart = alt.Chart(df).mark_bar().encode( x=alt.X(coluna, bin=alt.Bin(base=sliderH, extent=[1, 40000])), y='count()', tooltip=[coluna, 'count()']).properties(width=600, height=600).interactive() return chart
def get_wpm_plot(df): df2 = df.copy() df2['likert_var'] = np.var( df2[['Interest', 'Effective', 'Intelligence', 'Writing', 'Meet']], axis=1) df2['group'] = 'XLab' df2.loc[(df2['Start Date'] < "2021-04-05"), 'group'] = 'Amazon' p = alt.Chart(df2).mark_bar(opacity=0.8, stroke=berkeley_palette['black'], strokeWidth=0.5).encode( x = alt.X('wpm:Q', bin=alt.Bin(maxbins=100), title="Words per Minute (bin=100)"), y = alt.Y('count()', title='Frequency'), color=alt.Color('group:N', scale=alt.Scale(range = [berkeley_palette['berkeley_blue'], berkeley_palette['california_gold']]), legend = alt.Legend(title="Participant Group", padding=10, symbolType="square", symbolStrokeWidth=1, orient="right", offset=-170)) ).properties(height=300,width=650, title={'text':'Distribution of Response Time', 'subtitle':'Evaluated in Words per Minute'})\ .configure(padding={'top':20, 'left':20, 'right':20,'bottom':20})\ .configure_facet(spacing=10)\ .configure_view(stroke=None)\ .configure_title(anchor='middle')\ .configure_axis(grid=False)\ .configure_title(dy=-5) return p
def bar_chart(x1, y1, data): chart = (alt.Chart(data, width=500, height=300) .mark_bar(color='red', size=10, opacity=0.3).encode( x=alt.X((str)(x1), bin=alt.Bin(maxbins=100)), y=(str)(y1)) ) return chart
def plot_predictions(self, altair_config: Dict[str, Any], **kwargs) -> alt.Chart: p_min = kwargs["p_min"] p_max = kwargs["p_max"] df = pd.DataFrame(data={ "target": 1, "prediction": self.y_pred }).assign(focus=lambda df: df["prediction"].between(p_min, p_max)) color = alt.Color( "focus:N", legend=None, scale=alt.Scale(scheme=altair_config["scheme"]), ) return (alt.Chart(df).mark_bar().encode( alt.X( "prediction:Q", bin=alt.Bin(step=0.01), scale=alt.Scale(domain=(0, 1)), title="Predicted Probability of class 1", ), y=alt.Y("count()", title="Number of Predictions"), color=color, tooltip=["target"], ).properties( width="container", height=300, title="Distribution of Model Predictions").configure_title( **altair_config["title_config"]))