color = "#81F7D8" if "m4" in name: color = "#D20338" if "weight" in name: color = "#2E2EFE" if "random" in name: color = "#FFCE54" for value_label in value_labels: # 2.绘制JSON图像 print("# 2.绘制第%d幅%s折线图像..." % (i + 1, value_label)) alt.Chart(json_data).mark_line().encode( alt.X(scale=alt.Scale(domain=[start_time, end_time]), field=time_label, type="temporal", axis=alt.Axis(title="", labelFontSize=20)), alt.Y(scale=alt.Scale(domain=[min_value, max_value]), field=value_label, type="quantitative", axis=alt.Axis(title="", labelFontSize=20)), alt.Color(value=color)).properties( width=width, height=height).save(line_chart) os.system("mv *%s %s" % (line_postfix, value_label + "_" + line_dir)) print("# 2.绘制第%d幅%s面积图像..." % (i + 1, value_label)) alt.Chart(json_data).mark_area().encode( alt.X(scale=alt.Scale(domain=[start_time, end_time]), field=time_label, type="temporal",
# Plot Altair 1: Per country total cases and cases/million populations source = final_df #base configuration base = alt.Chart(source).encode( alt.X('Country/Region:N', sort=None), tooltip=['Country/Region', 'confirmed', 'cases/million' ]).properties(height=500, title='Total Confirmed Cases/Country') #base title configuration #bar chart bar = base.mark_bar(color='#5276A7').encode( alt.Y('confirmed:Q', axis=alt.Axis(titleColor='#5276A7'))) #point for cases/million and its axis point = base.mark_circle(size=60, color='red').encode( alt.Y('cases/million:Q', axis=alt.Axis(titleColor='red'))) #merge the plot alt.layer(bar, point).resolve_scale(y='independent') # Plot Altair 2: Global aggregates confirmed, recovered, and deaths (Im not using this at my webapp, only for example) source = timeseries_final base = alt.Chart(source).encode(x='date:T') line1 = base.mark_line(color='green').encode(y='total confirmed:Q') line2 = base.mark_line(color='blue').encode(y='total recovered:Q') line3 = base.mark_line(color='red').encode(y='total deaths:Q')
def result_heatmap(data, result="win", title=None, width=500, height=500): """ Function that takes a player's history data and returns an altair chart showing their winning percentage based on their hand totals and the dealer's up card """ possible_results = ["win", "loss", "push", "surrender"] assert result in possible_results, ( "'result' must be 'win', 'loss', or 'push'" ) if not title: title = f"{result.title()} Percentage" # convert data to a DataFrame if it's just a player's history list if isinstance(data, list): data = pd.DataFrame(data) # remove any hands where the dealer had blackjack or the player busted sub_data = data[(data["dealer_blackjack"] == 0) & (data["total"] <= 21)].copy() # calculate winning percentage for each total and dealer up card combo grouped_pct = sub_data.groupby( ["total", "dealer_up"] ).apply(results_pct, as_series=False) # unpack the tuple returned by groupby function and rename columns grouped_pct = grouped_pct.apply(pd.Series) grouped_pct.columns = possible_results # reset index and sort for plotting pct_data = grouped_pct.reset_index().sort_values("total", ascending=False) # dynamically determine how the legend should be labeled min_val = round(min(pct_data[possible_results].min()), 1) max_val = round(max(pct_data[possible_results].max()), 1) min_int = int(min_val * 10) max_int = int(max_val * 10) values = [ round(x * 0.1, 1) for x in range(min_int, max_int + 1) ] # create altair heatmap chart = alt.Chart( pct_data, title=title, width=width, height=height ).mark_rect(binSpacing=1).encode( x=alt.X( "dealer_up:O", axis=alt.Axis(orient="top", labelAngle=0), title="Dealer Up Card" ), y=alt.Y( "total:O", title="Player Total", sort=alt.EncodingSortField(op="mean", order="descending") ), color=alt.Color( f"{result}:Q", legend=alt.Legend( title=f"{result.title()} Probability", values=values ) ), tooltip=[ alt.Tooltip("dealer_up", title="Dealer Up Card"), alt.Tooltip("total", title="Player Total"), alt.Tooltip(f"{result}", title=f"{result.title()} Probability") ] ) return chart
source2 = [ { "start": "1933", "end": "1945", "event": "Nazi Rule" }, { "start": "1948", "end": "1989", "event": "GDR (East Germany)" }, ] source = alt.pd.DataFrame(source) source2 = alt.pd.DataFrame(source2) line = alt.Chart(source).mark_line(color="#333").encode( x=alt.X("year:T", axis=alt.Axis(format="%Y"), title="Year"), y=alt.Y("population", title="Population"), ) point = line.mark_point(color="#333") rect = alt.Chart(source2).mark_rect().encode(x="start:T", x2="end:T", color=alt.Color("event:N", title="Event")) (rect + line + point).properties( title="Population of Falkensee from 1875 to 2014", width=500, height=300)
def precipitation_dashboard(): try: db_precipitation = sqlite3.connect(db_path_precipitation, check_same_thread=False) db_precipitation_precip_data = pd.read_sql_query(sql="SELECT * FROM precipitation", con=db_precipitation) db_precipitation_stations = pd.read_sql_query(sql="SELECT * FROM station", con=db_precipitation) db_precipitation_precip_data["wateryear"] = [extract_wateryear(month, year) for month, year in zip(db_precipitation_precip_data["MONTH"], db_precipitation_precip_data["YEAR"])] data = db_precipitation_precip_data data_water_year_baran = data[['stationCode', 'wateryear', 'JAM_BARAN']].groupby(['stationCode', 'wateryear']).sum().reset_index() except: print("ERROR LOAD DATA FROM DATABASE") map = folium.Map( location=[db_precipitation_stations["latDecimalDegrees"].mean(), db_precipitation_stations["longDecimalDegrees"].mean()], tiles='Stamen Terrain', zoom_start=7 ) for i in range(len(db_precipitation_stations)): data_st = data_water_year_baran[data_water_year_baran["stationCode"] == db_precipitation_stations.stationCode[i]] # create an altair chart, then convert to JSON bar = alt.Chart(data_st, width=600).mark_bar().encode( x=alt.X('wateryear:O', axis=alt.Axis(title='سال آبی')), y=alt.Y('JAM_BARAN:Q', axis=alt.Axis(title='بارندگی - میلیمتر')) ) rule = alt.Chart(data_st).mark_rule(color='red').encode( y='mean(JAM_BARAN):Q' ) chart = (bar + rule).properties(title=db_precipitation_stations.stationName[i]).configure_axisY( labelFontSize=16, labelFont="B Zar", titleFont="B Zar", titleFontSize=16 ).configure_axisX( labelFontSize=16, labelFont="B Zar", titleFont="B Zar", titleFontSize=16 ).configure_title( fontSize=20, font="B Titr", ) chart.configure_title( align="left" ) vis = chart.to_json() folium.Marker( location=[db_precipitation_stations["latDecimalDegrees"][i], db_precipitation_stations["longDecimalDegrees"][i]], popup=folium.Popup(max_width=700).add_child(folium.features.VegaLite(vis, width=700, height=300)), tooltip="کلیک کنید" ).add_to(map) return render_template( template_name_or_list='precipitation_flask/precipitation_dashboard.html', map=map._repr_html_() )
def confirmed(): data = "data/confirmed.csv" filename = "graphs/confirmed.png" if os.path.exists(filename): os.remove(filename) df = pd.read_csv(data) df["date"] = pd.to_datetime(df["date"]) df = df.loc[df["source"] == "fhi:git"] df["new_sma7"] = df.new.rolling(window=7).mean().shift() df = df.melt( id_vars=["date"], value_vars=["new", "new_sma7", "total"], var_name="category", value_name="value", ).dropna() rename = {"new": "New cases", "new_sma7": "Avg 7 d.", "total": "Cumulative"} df["category"] = df["category"].replace(rename) base = alt.Chart( df, title="Number of reported COVID-19 cases by specimen collection date (Source: FHI)", ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40))) bar = ( base.transform_filter(alt.datum.category == "New cases") .mark_bar(color="#FFD1D1") .encode(y=alt.Y("value:Q", axis=alt.Axis(title="New per day", grid=True))) ) line = ( base.transform_filter(alt.datum.category == "Cumulative") .mark_line(color="#2E507B", strokeWidth=3) .encode( y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative")), color=alt.Color( "category:N", scale=alt.Scale( domain=["New cases", "Avg 7 d.", "Cumulative"], range=["#FFD1D1", "red", "#2E507B"], ), legend=alt.Legend(title=None), ), ) ) ma7 = ( base.transform_filter(alt.datum.category == "Avg 7 d.") .mark_line(opacity=0.8) .encode(y=alt.Y("value:Q"), color=alt.Color("category:N")) ) chart = ( alt.layer(bar + ma7, line) .resolve_scale(y="independent") .properties(width=1200, height=600) .configure_legend( strokeColor="gray", fillColor="#FFFFFF", labelFontSize=12, symbolStrokeWidth=2, symbolSize=160, padding=6, cornerRadius=5, direction="horizontal", orient="none", legendX=480, legendY=655, ) ) chart.save(filename)
np.random.seed(42) # Generating random data df = pd.DataFrame({'samples': np.random.normal(50, 15, 100).astype(int).astype(str)}) # Splitting stem and leaf df['stem'] = df['samples'].str[:-1] df['leaf'] = df['samples'].str[-1] df = df.sort_values(by=['stem', 'leaf']) # Determining leaf position df['position'] = df.groupby('stem').cumcount().add(1) # Creating stem and leaf plot alt.Chart(df).mark_text( align='left', baseline='middle', dx=-5 ).encode( alt.X('position:Q', axis=alt.Axis(title='', ticks=False, labels=False, grid=False) ), alt.Y('stem:N', axis=alt.Axis(title='', tickSize=0)), text='leaf:N' ).configure_axis( labelFontSize=20 ).configure_text( fontSize=20 )
def show_visualization(): #load in data fb_mask_original = load_data("fb_mask.csv") fb_sympton_original = load_data("fb_sympton.csv") fb_sympton = copy.deepcopy(fb_sympton_original) fb_mask = copy.deepcopy(fb_mask_original) fb_mask['time_value'] = pd.to_datetime(fb_mask['time_value'], format='%Y/%m/%d') fb_sympton['time_value'] = pd.to_datetime(fb_sympton['time_value'], format='%Y/%m/%d') fb_mask.rename(columns={'value': 'mask_percentage'}, inplace=True) fb_sympton.rename(columns={'value': 'sympton_percentage'}, inplace=True) fb_all = fb_mask.merge(fb_sympton, on=['time_value', 'geo_value']) fb_all = fb_all[[ 'geo_value', 'time_value', 'mask_percentage', 'sympton_percentage' ]] fb_all = fb_all[fb_all['time_value'] > '2020-09-08'] states = fb_all.geo_value.str.upper().unique() #first plot: correlation between wearing mask and having symptons st.title( "Let`s see the correlation between wearing mask and having symptons.") state_choice = st.sidebar.multiselect( "Which state are you interested in?", states.tolist(), default=['AK', 'AL', 'AR', 'AZ', 'CA', 'CO']) date_range = st.sidebar.date_input( "Which range of date are you interested in? Choose between %s and %s" % (min(fb_all['time_value']).strftime('%Y/%m/%d'), max(fb_all['time_value']).strftime('%Y/%m/%d')), [min(fb_all['time_value']), max(fb_all['time_value'])]) fb_temp = fb_all[fb_all['geo_value'].str.upper().isin(state_choice)] if len(date_range) == 2: fb_selected = fb_temp[ (fb_temp['time_value'] >= pd.to_datetime(date_range[0])) & (fb_temp['time_value'] <= pd.to_datetime(date_range[1]))] else: fb_selected = fb_temp[(fb_temp['time_value'] >= pd.to_datetime( date_range[0]))] scatter_chart = alt.Chart(fb_selected).mark_circle().encode( x=alt.X('mask_percentage', scale=alt.Scale(zero=False), axis=alt.Axis(title='percentage of wearing masks')), y=alt.Y('sympton_percentage', scale=alt.Scale(zero=False), axis=alt.Axis(title='percentage of having covid symptons'))) scatter_chart + scatter_chart.transform_regression( 'mask_percentage', 'sympton_percentage').mark_line() map_data = fb_all[fb_all['time_value'] == pd.to_datetime( date_range[0])].copy() ids = [ 2, 1, 5, 4, 6, 8, 9, 11, 10, 12, 13, 15, 19, 16, 17, 18, 20, 21, 22, 25, 24, 23, 26, 27, 29, 28, 30, 37, 38, 31, 33, 34, 35, 32, 36, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 51, 50, 53, 55, 54, 56 ] map_data['id'] = ids states = alt.topo_feature(data.us_10m.url, 'states') variable_list = ['mask_percentage', 'sympton_percentage'] chart = alt.Chart(states).mark_geoshape().encode( alt.Color(alt.repeat('row'), type='quantitative')).transform_lookup( lookup='id', from_=alt.LookupData(map_data, 'id', variable_list)).properties( width=500, height=300).project(type='albersUsa').repeat( row=variable_list).resolve_scale(color='independent') st.write(chart)
}]) open_close_color = alt.condition("datum.open < datum.close", alt.value("#06982d"), alt.value("#ae1325")) rule = alt.Chart(source).mark_rule().encode(alt.X( 'yearmonthdate(date):T', scale=alt.Scale(domain=[{ "month": 5, "date": 31, "year": 2009 }, { "month": 7, "date": 1, "year": 2009 }]), axis=alt.Axis(format='%m/%d', title='Date in 2009')), alt.Y( 'low', title='Price', scale=alt.Scale(zero=False), ), alt.Y2('high'), color=open_close_color) bar = alt.Chart(source).mark_bar().encode(x='yearmonthdate(date):T', y='open', y2='close', color=open_close_color) rule + bar
def main(): df1 = load_data_1() df2 = load_data_2() df3 = load_data_3() page = st.sidebar.selectbox("Choose a page", ["Table 4", "Table 6"]) if page == "Table 4": st.header( "Top 20 doctorate-granting institutions ranked by number of doctorate recipients, by broad field of study: 2017" ) mode = st.selectbox("Total or By Field", ['Total', 'By Field']) if mode == 'Total': x = 'field' y = 'total' graph = alt.Chart(df1).mark_bar().encode( x=alt.X(x, sort=None, axis=alt.Axis(labelFontSize=15, titleFontSize=20)), y=alt.Y(y, sort=None, axis=alt.Axis(labelFontSize=15, titleFontSize=20)), color=x, tooltip=["field", "total"]).interactive() st.altair_chart(graph, use_container_width=True) #st.write(graph) elif mode == 'By Field': field = st.selectbox("Choose a field", list(np.unique(df2['field']))) x = "Field and institution" y = "Doctorate recipients" tmp = df2.loc[df2['field'] == field, ] graph = alt.Chart(tmp).mark_bar().encode( x=alt.X(x, sort=None, axis=alt.Axis(labelFontSize=15, titleFontSize=20)), y=alt.Y(y, sort=None, axis=alt.Axis(labelFontSize=15, titleFontSize=20, title="Number")), tooltip=[x, y]).interactive() st.altair_chart(graph, use_container_width=True) elif page == "Table 6": st.header( "Doctorates awarded, by state or location, broad field of study, and sex of doctorate recipients: 2017" ) field = st.selectbox("Choose a field", list(np.unique(df3['field']))) tmp = df3.loc[df3['field'] == field, ] state = st.selectbox("Choose a state/location", list(np.unique(tmp['State or location']))) graph = alt.Chart( tmp.loc[tmp["State or location"] == state, ]).mark_bar().encode( x=alt.X("sex", sort=None, axis=alt.Axis(labelFontSize=15, titleFontSize=20)), y=alt.Y("value", sort=None, axis=alt.Axis(labelFontSize=15, titleFontSize=20)), tooltip=["field", "sex", "value"]).interactive() st.altair_chart(graph, use_container_width=True) st.write( "Note: For State Not Available in the options, it's either 0 or confidential." )
def main(opt): preprocessed_train = opt["--preprocessed_train"] # Read in data try: wine_train = pd.read_csv(preprocessed_train) except FileNotFoundError: print("Input csv file of train set does not exist.") sys.exit(1) # wine_train = pd.read_csv("../data/winequality-train.csv") # Create visualizations wine_train["quality"] = wine_train["quality"].astype("category") alt.data_transformers.disable_max_rows() ## Distribution of outcome variable quality_count_path = opt["--quality_count_path"] count_chart = (alt.Chart(wine_train).mark_bar(size=40).encode( x=alt.X( "quality:O", type="quantitative", title="Quality", axis=alt.Axis(format=".0f", ), ), y=alt.Y("count()"), color=alt.Color("quality", title="Wine Grade", scale=alt.Scale(scheme="viridis")), ).properties(width=400, title="Histogram: Number of Wines by Quality Class")) count_chart.save(quality_count_path) ## Create repeated charts for all 11 explanatory variables quality_all_variables_path = opt["--quality_all_variables_path"] # quality_all_variables_path = "../results/quality_all_variables.png" wine_train_for_plotting = wine_train.copy() new_colnames = map( lambda t: " ".join([word.capitalize() for word in t.split("_")]), list(wine_train.columns), ) wine_train_for_plotting.columns = list(new_colnames) pych_variables = [ "Fixed Acidity", "Volatile Acidity", "Citric Acid", "Residual Sugar", "Chlorides", "Free Sulfur Dioxide", "Total Sulfur Dioxide", "Density", "Ph", "Sulphates", "Alcohol", ] bar = (alt.Chart(wine_train_for_plotting).mark_bar().encode( x=alt.X("Quality", title="Quality"), y=alt.Y( alt.repeat("row"), type="quantitative", aggregate="mean", scale=alt.Scale(zero=False), ), color=alt.Color("Quality", title="Wine Grade", scale=alt.Scale(scheme="viridis")), ).properties(width=400, height=300)) error = (alt.Chart(wine_train_for_plotting).mark_errorbar().encode( x=alt.X("Quality"), y=alt.Y( alt.repeat("row"), type="quantitative", scale=alt.Scale(zero=False), ), )) quality_all_variables_left = (bar + error).repeat(row=pych_variables[:4], ) quality_all_variables_middle = (bar + error).repeat( row=pych_variables[4:8], ) quality_all_variables_right = (bar + error).repeat( row=pych_variables[8:], ) quality_all_variables = (quality_all_variables_left | quality_all_variables_middle | quality_all_variables_right) quality_all_variables.save(quality_all_variables_path)
""" Becker's Barley Trellis Plot ---------------------------- The example demonstrates the trellis charts created by Richard Becker, William Cleveland and others in the 1990s. Using the visualization technique below they identified an anomoly in a widely used agriculatural dataset, which they termed ["The Morris Mistake."](https://www.albany.edu/acc/courses/acc522fall2007/lecturenotes/trellis.usermanual.pdf). It became their favored way of showcasing the power of this pioneering plot. """ # category: case studies import altair as alt from vega_datasets import data source = data.barley() alt.Chart(source, title="The Morris Mistake").mark_point().encode( alt.X('yield:Q', scale=alt.Scale(zero=False), axis=alt.Axis(grid=False, title="Barley Yield (bushels/acre)")), alt.Y('variety:N', sort=alt.EncodingSortField(field='yield', op='sum', order='descending'), scale=alt.Scale(rangeStep=20), axis=alt.Axis(title="", grid=True)), color=alt.Color('year:N', legend=alt.Legend(title="Year")), row=alt.Row( 'site:N', title="", sort=alt.EncodingSortField(field='yield', op='sum', order='descending'), )).configure_view(stroke="transparent")
url = 'https://raw.githubusercontent.com/blmoore/blogR/master/data/measles_incidence.csv' data = pd.read_csv(url, skiprows=2, na_values='-') data.head() annual = data.drop('WEEK', axis=1).groupby('YEAR').sum() annual.head() measles = annual.reset_index() measles = pd.melt(measles, 'YEAR', var_name='state', value_name='incidence') measles.head() alt.Chart(measles).mark_rect().encode(x='YEAR:O', y='state:N', color='incidence').properties(width=600, height=400) # Define a custom colormape using Hex codes & HTML color names colormap = alt.Scale(domain=[0, 100, 200, 300, 1000, 3000], range=[ '#F0F8FF', 'cornflowerblue', 'mediumseagreen', '#FFEE00', 'darkorange', 'firebrick' ], type='sqrt') alt.Chart(measles).mark_rect().encode( alt.X('YEAR:O', axis=alt.Axis(title=None, ticks=False)), alt.Y('state:N', axis=alt.Axis(title=None, ticks=False)), alt.Color('incidence:Q', sort='ascending', scale=colormap, legend=None)).properties(width=800, height=500) #%%
def _make_manual_legend(self, df, click_selection): groups = df.groupby(self.colorby).first().reset_index().sort_values( self.colorby, ascending=True) group_names = list(groups[self.colorby].values) if len(group_names) > self.MAX_LEGEND_MARKS: raise ValueError( f'max {self.MAX_LEGEND_MARKS} supported for now ({len(group_names)} requested)' ) idx = list(self.MAX_LEGEND_MARKS + 1 - np.arange(len(group_names))) row_type = ['normal'] * len(idx) idx.append(self.MAX_LEGEND_MARKS + 2) row_type.append('title') group_names.append(f'Select {self.get("readable_group_name", "line")}') xs = np.zeros_like(idx) leg_df = pd.DataFrame({ 'idx': idx, 'group_idx': list(groups['group_idx']) + [-1], self._colorby: group_names, 'x': list(xs), 'row_type': row_type, }) axis = alt.Axis(domain=False, ticks=False, orient='right', grid=False, labels=False) base = alt.Chart( leg_df, height=self._height, width=100, ) def _make_base(base, **extra_kwargs): return base.encode( x=alt.X('x:Q', title='', axis=axis, scale=alt.Scale(domain=(-5, 20))), y=alt.Y('idx:Q', title='', axis=axis, scale=alt.Scale(domain=(0, self.MAX_LEGEND_MARKS))), color=self._alt_color, detail=self._alt_detail, **extra_kwargs) legend_points = _make_base(base, opacity=alt.condition( self._click_focused_or_none_selected(), alt.value(1), alt.value(0.4), )).mark_point(shape='diamond', filled=True, size=160) legend_points = legend_points.transform_filter( 'datum.row_type == "normal"') cursor = alt.selection_single(name='legend_hover', nearest=True, on='mouseover', clear='mouseout', fields=['group_idx'], empty='none') layers = [ legend_points, legend_points. mark_text( # fake layer to add the click selection to align='left', ).encode( text=f'padded_text:N', opacity=alt.value(0), ).transform_calculate( padded_text=f'"__" + datum.{self._colorby} + "__"'). add_selection(click_selection), _make_base(base).mark_point(size=0).add_selection(cursor), legend_points.mark_text( align='left', dx=10, font=self._font, ).encode( text=f'{self._colorby}:N', color=alt.value('black'), opacity=alt.condition( self._in_focus_or_none_selected(), alt.value(1), alt.value(0.4), ), ), _make_base(base).mark_text( align='left', dx=-10, dy=-5, font=self._font, fontSize=16, ).encode( text=f'{self._colorby}:N', color=alt.value('black'), ).transform_filter('datum.row_type == "title"') ] return alt.layer(*layers, view=alt.ViewConfig(strokeOpacity=0))
def vaccine_doses(): data = "data/vaccine_doses.csv" filename = "graphs/vaccine_doses.png" if os.path.exists(filename): os.remove(filename) df = pd.read_csv(data) df["date"] = pd.to_datetime(df["date"]) df = df[df["granularity_geo"] == "nation"] df["new_sma7"] = df.new_doses.rolling(window=7).mean().shift() df = df.melt( id_vars=["date"], value_vars=["total_dose_1", "total_dose_2", "total_dose_3"], var_name="category", value_name="value", ).dropna() rename = { "total_dose_1": "Dose 1", "total_dose_2": "Dose 2", "total_dose_3": "Dose 3", } df["category"] = df["category"].replace(rename) chart = ( alt.Chart( df, title="Number of people who received their first, second and third dose of a COVID-19 vaccine in Norway (Source: FHI)", ) .mark_area(line={}, opacity=0.3) .encode( x=alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)), y=alt.Y( "value:Q", stack=None, title="Number of people", ), color=alt.Color( "category:N", scale=alt.Scale( domain=[ "Dose 1", "Dose 2", "Dose 3", ], range=["#5dade2", " #2ecc71", "#006600"], ), legend=alt.Legend(title=None), ), ) .properties(width=1200, height=600) .configure_legend( strokeColor="gray", fillColor="#FFFFFF", labelFontSize=12, symbolStrokeWidth=2, symbolSize=160, padding=6, cornerRadius=5, direction="horizontal", orient="none", legendX=380, legendY=660, ) ) chart.save(filename)
def plot_interactive( self, x_axis: Union[str, Embedding], y_axis: Union[str, Embedding], annot: bool = True, show_axis_point: bool = False, color: Union[None, str] = None, ): """ Makes highly interactive plot of the set of embeddings. Arguments: x_axis: the x-axis to be used, must be given when dim > 2 y_axis: the y-axis to be used, must be given when dim > 2 annot: drawn points should be annotated show_axis_point: ensure that the axis are drawn color: a property that will be used for plotting **Usage** ```python from whatlies.language import SpacyLanguage words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman", "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire", "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water", "person", "family", "brother", "sister"] lang = SpacyLanguage("en_core_web_md") emb = lang[words] emb.plot_interactive('man', 'woman') ``` """ if isinstance(x_axis, str): x_axis = self[x_axis] if isinstance(y_axis, str): y_axis = self[y_axis] plot_df = pd.DataFrame({ "x_axis": self.compare_against(x_axis), "y_axis": self.compare_against(y_axis), "name": [v.name for v in self.embeddings.values()], "original": [v.orig for v in self.embeddings.values()], }) if color: plot_df[color] = [ getattr(v, color) if hasattr(v, color) else '' for v in self.embeddings.values() ] if not show_axis_point: plot_df = plot_df.loc[ lambda d: ~d["name"].isin([x_axis.name, y_axis.name])] result = (alt.Chart(plot_df).mark_circle(size=60).encode( x=alt.X("x_axis", axis=alt.Axis(title=x_axis.name)), y=alt.X("y_axis", axis=alt.Axis(title=y_axis.name)), tooltip=["name", "original"], color=alt.Color(":N", legend=None) if not color else alt.Color(color), ).properties(title=f"{x_axis.name} vs. {y_axis.name}").interactive()) if annot: text = (alt.Chart(plot_df).mark_text( dx=-15, dy=3, color="black").encode( x=alt.X("x_axis", axis=alt.Axis(title=x_axis.name)), y=alt.X("y_axis", axis=alt.Axis(title=y_axis.name)), text="original", )) result = result + text return result
def tested_lab(): data = "data/tested_lab.csv" filename = "graphs/tested_lab.png" if os.path.exists(filename): os.remove(filename) df = pd.read_csv(data) mapping = { "new_neg": "New (Negative)", "new_pos": "New (Positive)", "new_total": "New", "pr100_pos": "Share Positive", "total": "Cumulative", } df = df.rename(columns=mapping) df["date"] = pd.to_datetime(df["date"]) df["Share Negative"] = 100 - df["Share Positive"] df = df.melt( id_vars=["date", "Share Positive"], var_name="category", value_name="value" ) base = alt.Chart( df, title="Number of tested persons per specimen collection date and number of positive results (Source: FHI)", ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40))) andel = base.mark_line(color="red", opacity=0.8).encode( y=alt.Y("Share Positive:Q", title="% Positive", axis=alt.Axis(grid=True)) ) bar = ( base.transform_filter( (alt.datum.category == "New (Negative)") | (alt.datum.category == "New (Positive)") ) .mark_bar() .encode( y=alt.Y("value:Q", title="Number of persons"), color=alt.Color( "category:N", scale=alt.Scale( domain=["New (Positive)", "New (Negative)", "% Positive"], range=["#FF9622", "#6DA9FF", "red"], ), legend=alt.Legend(title=None), ), ) ) chart = ( alt.layer(bar, andel) .resolve_scale(y="independent") .properties(width=1200, height=600) .configure_legend( strokeColor="gray", fillColor="#FFFFFF", labelFontSize=12, symbolStrokeWidth=2, symbolSize=160, padding=6, cornerRadius=5, direction="horizontal", orient="none", legendX=480, legendY=655, ) ) chart.save(filename)
#add some helping columns for plotting chart_data["today"] = today chart_data["max_price"] = chart_data["price"].max() #filter by start_date and end_date date_mask = (chart_data['date'].dt.date > start_date) & (chart_data['date'].dt.date <= end_date) chart_data = chart_data[date_mask] #create separate df for highlighting the forecasted area pred_data = chart_data[chart_data["date"].dt.date >= today] #build charts base = alt.Chart(chart_data, height=500, width=700) line = base.mark_line().encode(x='date:T', y=alt.Y('price:Q', axis=alt.Axis(title='price')), color='stock:N') ''' The forecast made by model is under the lightblue area. ''' band = alt.Chart(pred_data, height=500, width=700).mark_area( opacity=0.5, color='lightblue').encode(x='date', y=alt.Y('max_price', axis=alt.Axis(title=''))) st.altair_chart(line + band) '''The data is gathered from Yahoo Finance API.'''
""" Line Chart with Percent axis ---------------------------- This example shows how to set an axis as a percent. """ import altair as alt from altair.expr import datum from vega_datasets import data source = data.jobs.url chart = alt.Chart(source).mark_line().encode( alt.X('year:O'), alt.Y('perc:Q', axis=alt.Axis(format='%')), color='sex:N').properties( title='Percent of work-force working as Welders').transform_filter( datum.job == 'Welder')
"x": 14, "y": 66 }, { "x": 15, "y": 17 }, { "x": 16, "y": 27 }, { "x": 17, "y": 68 }, { "x": 18, "y": 16 }, { "x": 19, "y": 49 }, { "x": 20, "y": 15 }]) area1 = alt.Chart(df).mark_area(clip=True, interpolate='monotone').encode( alt.X('x', scale=alt.Scale(zero=False, nice=False)), alt.Y('y', scale=alt.Scale(domain=[0, 50]), axis=alt.Axis(title='y')), opacity=alt.value(0.6)).properties(width=500, height=75) area2 = area1.encode(y='ny:Q').transform_calculate("ny", datum.y - 50) area1 + area2
print('\n') charts = {} info = [] for i in range(12, 39): info.append('id_' + str(i)) for i in info: width_len = 400 if i in ['id_30', 'id_31', 'id_33']: width_len = 600 feature_count = train[i].value_counts(dropna=False).reset_index().rename( columns={ i: 'count', 'index': i }) chart = alt.Chart(feature_count).mark_bar().encode( y=alt.Y(f"{i}:N", axis=alt.Axis(title=i)), x=alt.X('count:Q', axis=alt.Axis(title='Count')), tooltip=[i, 'count']).properties(title=f"Counts of {i}", width=width_len) charts[i] = chart for i in ['id_30', 'id_31', 'id_33']: feature_count = train[i].value_counts( dropna=False)[:40].reset_index().rename(columns={ i: 'count', 'index': i }) chart = alt.Chart(feature_count).mark_bar().encode( x=alt.X(f"{i}:N", axis=alt.Axis(title=i)), y=alt.Y('count:Q', axis=alt.Axis(title='Count')), tooltip=[i, 'count']).properties(title=f"Counts of {i}", width=800)
""" Calculating Percentage of Total ------------------------------- This chart demonstrates how to use a window transform to display data values as a percentage of total values. """ # category: bar charts import altair as alt import pandas as pd activities = pd.DataFrame({ 'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'], 'Time': [8, 2, 4, 8, 2] }) alt.Chart(activities).mark_bar().encode( alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')), y='Activity:N').transform_window(TotalTime='sum(Time)', frame=[ None, None ]).transform_calculate(PercentOfTotal="datum.Time / datum.TotalTime")
def build_graph(self): with open(os.path.join(os.path.dirname(__file__), 'colors.json')) as f: colors = json.load(f) allColorsValues = [] # filter data max_languages = 5 top_languages = {} for year in self.yearly_data.keys(): for quarter in self.yearly_data[year].keys(): for language in sorted(list(self.yearly_data[year][quarter].keys()), key=lambda lang: self.yearly_data[year][quarter][lang], reverse=True)[ 0:max_languages]: if 'top' not in self.yearly_data[year][quarter]: self.yearly_data[year][quarter]['top'] = {} if self.yearly_data[year][quarter][language] != 0: self.yearly_data[year][quarter]['top'][language] = self.yearly_data[year][quarter][language] if language not in top_languages: top_languages[language] = 1 top_languages[language] += 1 # print(self.yearly_data) all_languages = list(top_languages.keys()) for language in all_languages: if colors[language]['color'] is not None: allColorsValues.append(colors[language]['color']) languages_all_loc = {} for language in all_languages: language_year = [] for year in self.yearly_data.keys(): language_quarter = [0, 0, 0, 0] for quarter in self.yearly_data[year].keys(): if language in self.yearly_data[year][quarter]['top']: language_quarter[quarter - 1] = self.yearly_data[year][quarter]['top'][language] else: language_quarter[quarter - 1] = 0 language_year.append(language_quarter) languages_all_loc[language] = language_year # print(languages_all_loc) language_df = {} def prep_df(df, name): df = df.stack().reset_index() df.columns = ['c1', 'c2', 'values'] df['Language'] = name return df for language in languages_all_loc.keys(): language_df[language] = pd.DataFrame(languages_all_loc[language], index=list(self.yearly_data.keys()), columns=["Q1", "Q2", "Q3", "Q4"]) for language in language_df.keys(): language_df[language] = prep_df(language_df[language], language) df = pd.concat(language_df.values()) chart = alt.Chart(df).mark_bar().encode( # tell Altair which field to group columns on x=alt.X('c2:N', title=None), # tell Altair which field to use as Y values and how to calculate y=alt.Y('sum(values):Q', axis=alt.Axis( grid=False, title='Lines Of Code added')), # tell Altair which field to use to use as the set of columns to be represented in each group column=alt.Column('c1:N', title=None), # tell Altair which field to use for color segmentation color=alt.Color('Language:N', scale=alt.Scale( domain=all_languages, # make it look pretty with an enjoyable color pallet range=allColorsValues, ), )) \ .configure_view( # remove grid lines around column clusters strokeOpacity=0 ) chart.save('bar_graph.png') return 'bar_graph.png'
def dead(): data = "data/dead.csv" filename = "graphs/dead.png" if os.path.exists(filename): os.remove(filename) df = pd.read_csv(data) today = date.today() idx = pd.date_range("2020-03-07", df["date"].max()) df.index = pd.DatetimeIndex(df["date"]) df = df.reindex(idx) df["date"] = df.index df = df.reset_index(drop=True) df = df[df.date <= str(today)] df["new"] = df["new"].fillna(0).astype(int) df["total"] = df["total"].fillna(method="bfill").astype(int) df["new_sma7"] = df.new.rolling(window=7).mean() df = df.melt( id_vars=["date"], value_vars=["new", "new_sma7", "total"], var_name="category", value_name="value", ).dropna() rename = {"new": "New", "new_sma7": "Avg 7 d.", "total": "Cumulative"} df["category"] = df["category"].replace(rename) base = alt.Chart(df, title="COVID-19 related deaths (Source: FHI)").encode( alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)) ) bar = ( base.transform_filter(alt.datum.category == "New") .mark_bar(color="#FFD1D1") .encode(y=alt.Y("value:Q", axis=alt.Axis(title="New per day", grid=True))) ) line = ( base.transform_filter(alt.datum.category == "Cumulative") .mark_line(color="#2E507B", strokeWidth=3) .encode( y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative")), color=alt.Color( "category:N", scale=alt.Scale( domain=["New", "Avg 7 d.", "Cumulative"], range=["#FFD1D1", "red", "#2E507B"], ), legend=alt.Legend(title=None), ), ) ) ma7 = ( base.transform_filter(alt.datum.category == "Avg 7 d.") .mark_line(opacity=0.8) .encode(y=alt.Y("value:Q"), color=alt.Color("category:N")) ) chart = ( alt.layer(bar + ma7, line) .resolve_scale(y="independent") .properties(width=1200, height=600) .configure_legend( strokeColor="gray", fillColor="#FFFFFF", labelFontSize=12, symbolStrokeWidth=2, symbolSize=160, padding=6, cornerRadius=5, direction="horizontal", orient="none", legendX=480, legendY=655, ) ) chart.save(filename)
}, { "year": "2014", "population": 41777 }] source2 = [{ "start": "1933", "end": "1945", "event": "Nazi Rule" }, { "start": "1948", "end": "1989", "event": "GDR (East Germany)" }] source = alt.pd.DataFrame(source) source2 = alt.pd.DataFrame(source2) line = alt.Chart(source).mark_line(color='#333').encode( alt.X('year:T', axis=alt.Axis(format='%Y')), y='population').properties(width=600, height=400) point = line.mark_point(color='#333') rect = alt.Chart(source2).mark_rect().encode(x='start:T', x2='end:T', color='event:N') rect + line + point
def hospitalized(): data = "data/hospitalized.csv" filename = "graphs/hospitalized.png" if os.path.exists(filename): os.remove(filename) df = pd.read_csv(data) today = date.today() idx = pd.date_range("2020-03-08", today) df.index = pd.DatetimeIndex(df["date"]) df = df.reindex(idx) df["date"] = df.index df = df.reset_index(drop=True) df["admissions"] = df["admissions"].fillna(method="ffill").astype(int) df["icu"] = df["icu"].fillna(method="ffill").astype(int) df["respiratory"] = df["respiratory"].fillna(method="ffill").astype(int) df_melt = pd.melt( df, id_vars=["date"], value_vars=["admissions", "icu", "respiratory"], value_name="value", ).replace( { "admissions": "Hospitalized", "icu": "Intensive", "respiratory": "Respirator", } ) chart = ( alt.Chart( df_melt, title="Number of patients admitted to hospital with COVID-19 (Source: Helsedirektoratet)", ) .mark_area(line={}, opacity=0.3) .encode( x=alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)), y=alt.Y( "value:Q", stack=None, title="Number of patients", ), color=alt.Color( "variable:N", scale=alt.Scale( domain=["Hospitalized", "Intensive Care", "Respirator"], range=["#5A9DFF", "#FF8B1B", "#FF642B"], ), legend=alt.Legend(title=None), ), ) .properties(width=1200, height=600) .configure_legend( strokeColor="gray", fillColor="#FFFFFF", labelFontSize=12, symbolStrokeWidth=2, symbolSize=160, padding=6, cornerRadius=5, direction="horizontal", orient="none", legendX=480, legendY=655, ) ) chart.save(filename)
def outcome_bars(data, name=None, width=100): """ Create a bar chart showing the percentage of hands won, lost, and pushed """ # if it's a dataframe already, just add the name for the legend if isinstance(data, pd.DataFrame): data_list = [data] elif isinstance(data, list): # check if it's a list of dicionaries, like player history, or a list # of lists for item in data: l_o_d = isinstance(item, dict) # if it's a list of dictionaries, just convert them if l_o_d: data_list = [pd.DataFrame(data)] else: data_list = [pd.DataFrame(item) for item in data] else: msg = "'data' must be a DataFrame or list" raise TypeError(msg) # calculate percentages # assign name to data if not name: name = [f"Game{i}" for i in range(len(data))] plot_data_list = [] # list to hold dataframes that will be plotted for _name, _data in zip(name, data_list): win, loss, push, surrender = results_pct(_data, as_series=False) plot_data_list.append( {"game": _name, "result": "Win", "pct": win, "order": 1}, ) plot_data_list.append( {"game": _name, "result": "Loss", "pct": loss, "order": 2} ) plot_data_list.append( {"game": _name, "result": "Push", "pct": push, "order": 3} ) plot_data_list.append( {"game": _name, "result": "Surrender", "pct": surrender, "order": 3} ) plot_data = pd.DataFrame(plot_data_list) # create altair chart chart = alt.Chart(plot_data, width=width).mark_bar().encode( x=alt.X( "game", axis=alt.Axis(labelAngle=-45), title=None, sort=["Win", "Loss", "Push"] ), y=alt.Y( "pct:Q" ), color=alt.Color( "game:O", legend=None ), column=alt.Column( "result:O", title="Result" ), tooltip=[ alt.Tooltip("pct", title="Pct") ] ) return chart
def smittestopp(): data = "data/smittestopp.csv" filename = "graphs/smittestopp.png" if os.path.exists(filename): os.remove(filename) df = pd.read_csv(data) df["date"] = pd.to_datetime(df["date"]) df = df.melt( id_vars=["date"], value_vars=["new_reported", "total_downloads"], var_name="category", value_name="value", ).dropna() rename = { "new_reported": "Number of reported infections", "total_downloads": "Number of downloads", } df["category"] = df["category"].replace(rename) base = alt.Chart( df, title="Number of downloads of Smittestopp og number of reported infections through the app (Source: FHI)", ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40))) downloads = ( base.transform_filter(alt.datum.category == "Number of downloads") .mark_area(line={}, color="#5BC1FF", opacity=0.2) .encode( y=alt.Y( "value:Q", axis=alt.Axis(title="Number of downloads", grid=True), ) ) ) reported = ( base.transform_filter(alt.datum.category == "Number of reported infections") .mark_bar(color="#FFA57E") .encode( y=alt.Y("value:Q", axis=alt.Axis(title="Number of reported infections")), color=alt.Color( "category:N", scale=alt.Scale( domain=[ "Number of downloads", "Number of reported infections", ], range=["#5BC1FF", "#FFA57E"], ), legend=alt.Legend(title=None), ), ) ) chart = ( alt.layer(reported, downloads) .resolve_scale(y="independent") .properties(width=1200, height=600) .configure_legend( strokeColor="gray", fillColor="#FFFFFF", labelFontSize=12, symbolStrokeWidth=2, symbolSize=160, labelLimit=200, padding=6, cornerRadius=5, direction="horizontal", orient="none", legendX=390, legendY=660, ) ) chart.save(filename)
def labels(self, internal=False, **kwargs): self._parseArgs(call='labels', **kwargs) if internal: temp = self.source.groupby('variable').last().reset_index() if self.date_label: if type(self.date_label) == bool: if self._datebased: max_time = temp['x'].max() if self.timezone: formatted_time = pd.to_datetime( max_time, infer_datetime_format=True, utc=True) _formatted_time = str( formatted_time.tz_localize(None)) + ' ' + str( formatted_time.tzinfo) else: formatted_time = pd.to_datetime( max_time, infer_datetime_format=True) _formatted_time = str( formatted_time.tz_localize(None)) temp_time = pd.DataFrame([{ 'variable': 'Time', 'x': max_time, 'value': _formatted_time }]) elif type(self.date_label) == str: if self.date_label in {'Date', 'date', 'days', 'day'}: max_time = temp['x'].max() formatted_time = pd.to_datetime( max_time, infer_datetime_format=True).date() temp_time = pd.DataFrame([{ 'variable': 'Time', 'x': max_time, 'value': str(formatted_time) }]) elif self.date_label in {'Time', 'time', 'hour', 'hours'}: max_time = temp['x'].max() formatted_time = pd.to_datetime( max_time, infer_datetime_format=True).time() temp_time = pd.DataFrame([{ 'variable': 'Time', 'x': max_time, 'value': str(formatted_time) }]) else: temp_time = pd.DataFrame([{ 'variable': ' ', 'x': str(self.date_label), 'value': str(self.date_label) }]) else: raise ValueError('Unsupported date_label argument.') _width = 80 + 4 * int(len(temp_time['value'].values[0])) width = self.prop.get('width') width1, width2 = width - _width, _width labels = alt.Chart(temp).mark_text(**self.textMark).encode( x=alt.X('variable:O', axis=alt.Axis(**self.labelsAxis), title=None), text=alt.Text('value:Q', format=self.format)).properties(width=width1, height=30, title='') time_label = alt.Chart(temp_time).mark_text( **self.textMark).encode( x=alt.X('variable:O', axis=alt.Axis(**self.labelsAxis), title=None), text=alt.Text('value:O', )).properties(width=width2, height=30, title='') labels = alt.hconcat(labels, time_label, spacing=0, title=alt.TitleParams(text=self.prop.get( 'title', 'Title Needed'), anchor='middle')) else: temp = self.source.groupby('variable').last().reset_index() labels = alt.Chart(temp).mark_text(**self.textMark).encode( x=alt.X('variable:O', axis=alt.Axis(**self.labelsAxis), title=None), text=alt.Text('value:Q', format=self.format)).properties( width=self.prop.get('width'), height=30, title=self.prop.get('title', 'Title Needed')) if self.prop.get('title'): self.prop.pop('title') self._labels = labels return self
def plot_interactive( self, x_axis: Union[int, str, Embedding] = 0, y_axis: Union[int, str, Embedding] = 1, axis_metric: Optional[Union[str, Callable, Sequence]] = None, x_label: Optional[str] = None, y_label: Optional[str] = None, title: Optional[str] = None, annot: bool = True, color: Union[None, str] = None, ): """ Makes highly interactive plot of the set of embeddings. Arguments: x_axis: the x-axis to be used, must be given when dim > 2; if an integer, the corresponding dimension of embedding is used. y_axis: the y-axis to be used, must be given when dim > 2; if an integer, the corresponding dimension of embedding is used. axis_metric: the metric used to project each embedding on the axes; only used when the corresponding axis (i.e. `x_axis` or `y_axis`) is a string or an `Embedding` instance. It could be a string (`'cosine_similarity'`, `'cosine_distance'` or `'euclidean'`), or a callable that takes two vectors as input and returns a scalar value as output. To set different metrics for x- and y-axis, a list or a tuple of two elements could be given. By default (`None`), normalized scalar projection (i.e. `>` operator) is used. x_label: an optional label used for x-axis; if not given, it is set based on `x_axis` value. y_label: an optional label used for y-axis; if not given, it is set based on `y_axis` value. title: an optional title for the plot; if not given, it is set based on `x_axis` and `y_axis` values. annot: drawn points should be annotated color: a property that will be used for plotting **Usage** ```python from whatlies.language import SpacyLanguage words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman", "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire", "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water", "person", "family", "brother", "sister"] lang = SpacyLanguage("en_core_web_sm") emb = lang[words] emb.plot_interactive('man', 'woman') ``` """ if isinstance(x_axis, str): x_axis = self[x_axis] if isinstance(y_axis, str): y_axis = self[y_axis] if isinstance(axis_metric, (list, tuple)): x_axis_metric = axis_metric[0] y_axis_metric = axis_metric[1] else: x_axis_metric = axis_metric y_axis_metric = axis_metric # Determine axes values and labels if isinstance(x_axis, int): x_val = self.to_X()[:, x_axis] x_lab = "Dimension " + str(x_axis) else: x_axis_metric = Embedding._get_plot_axis_metric_callable(x_axis_metric) x_val = self.compare_against(x_axis, mapping=x_axis_metric) x_lab = x_axis.name if isinstance(y_axis, int): y_val = self.to_X()[:, y_axis] y_lab = "Dimension " + str(y_axis) else: y_axis_metric = Embedding._get_plot_axis_metric_callable(y_axis_metric) y_val = self.compare_against(y_axis, mapping=y_axis_metric) y_lab = y_axis.name x_label = x_label if x_label is not None else x_lab y_label = y_label if y_label is not None else y_lab title = title if title is not None else f"{x_lab} vs. {y_lab}" plot_df = pd.DataFrame( { "x_axis": x_val, "y_axis": y_val, "name": [v.name for v in self.embeddings.values()], "original": [v.orig for v in self.embeddings.values()], } ) if color: plot_df[color] = [ getattr(v, color) if hasattr(v, color) else "" for v in self.embeddings.values() ] result = ( alt.Chart(plot_df) .mark_circle(size=60) .encode( x=alt.X("x_axis", axis=alt.Axis(title=x_label)), y=alt.X("y_axis", axis=alt.Axis(title=y_label)), tooltip=["name", "original"], color=alt.Color(":N", legend=None) if not color else alt.Color(color), ) .properties(title=title) .interactive() ) if annot: text = ( alt.Chart(plot_df) .mark_text(dx=-15, dy=3, color="black") .encode( x="x_axis", y="y_axis", text="original", ) ) result = result + text return result