def geojson_feature(data, feature='features', **kwargs): """A convenience function for extracting features from a geojson object or url Parameters ---------- data : anyOf(string, geojson.GeoJSON) string is interpreted as URL from which to load the data set. geojson.GeoJSON is interpreted as data set itself. feature : string The JSON property containing the GeoJSON object set to convert to a GeoJSON feature collection. For example ``features[0].geometry``. \**kwargs : additional keywords passed to JsonDataFormat """ if isinstance(data, six.string_types): return alt.UrlData(url=data, format=alt.JsonDataFormat(type='json', property=feature, **kwargs)) elif hasattr(data, '__geo_interface__'): if isinstance(data, gpd.GeoDataFrame): data = alt.utils.sanitize_dataframe(data) return alt.InlineData(values=data.__geo_interface__, format=alt.JsonDataFormat(type='json', property=feature, **kwargs)) else: warnings.warn("data of type {0} not recognized".format(type(data))) return data
def make_heatmap(year_slider=[1950, 2000], genre="Comedy"): ''' Takes in a year range and genre and filters with the criteria to create our Altair figure A heatmap of box office profit ratio against IMDb ratings Inputs : year_info : List with 2 values, takes in the year information from the callback above [1970,1985] genre_input : (To be programmed) : takes in the genre information 'Drama', 'Any' Returns A heatmap Altair object ''' df = alt.UrlData(data.movies.url) heatmap = alt.Chart(df).mark_rect().transform_filter( alt.datum.Production_Budget > 0).transform_calculate( Release_Year="year(datum.Release_Date)", Profit="datum.Worldwide_Gross - datum.Production_Budget", Profit_Ratio="datum.Profit / datum.Production_Budget" ).transform_filter( alt.datum.Release_Year <= year_slider[1]).transform_filter( alt.datum.Release_Year >= year_slider[0] ).transform_filter(alt.datum.Profit_Ratio < 10).transform_filter( alt.datum.Major_Genre == genre).encode( alt.X('IMDB_Rating:Q', bin=alt.Bin(maxbins=20), axis=alt.Axis(title='IMDB Rating')), alt.Y('Profit_Ratio:Q', bin=alt.Bin(maxbins=20), axis=alt.Axis( title='Profit Ratio ((Gross - Budget)/Budget)')), alt.Color('mean(Profit_Ratio):Q', scale=alt.Scale(scheme='yelloworangebrown'), legend=alt.Legend(orient="bottom")), tooltip=['Title:N']).properties( title="Profit Ratio against IMDb Ratings", height=300, width=300) return heatmap
def make_areachart(year_slider=[1950, 2000], genre="Comedy"): ''' Takes in a year range and genre and filters with the criteria to create our Altair figure An area chart of box office of movies of selected genre in selected year range with domestic and international data Inputs : year_info : List with 2 values, takes in the year information from the callback above [1970,1985] genre_input : (To be programmed) : takes in the genre information 'Drama', 'Any' Returns A area chart Altair object ''' df = alt.UrlData(data.movies.url) areachart = alt.Chart(df).mark_area().transform_calculate( Release_Year="year(datum.Release_Date)", International_Gross="datum.Worldwide_Gross - datum.US_Gross" ).transform_filter( alt.datum.Release_Year <= year_slider[1]).transform_filter( alt.datum.Release_Year >= year_slider[0]).transform_filter( alt.datum.Major_Genre == genre).transform_fold( ['International_Gross', 'US_Gross'], ).encode( alt.Y('value:Q', aggregate="mean", axis=alt.Axis(title='Dollars')), alt.X('Release_Year:O', axis=alt.Axis(title='Year')), color=alt.Color( "key:N", legend=alt.Legend(orient="bottom"), scale=alt.Scale( scheme='yelloworangebrown'))).properties( title="Average Gross", height=300, width=300) return areachart
def make_highlight_hist(year_slider=[1950, 2000], genre="Comedy"): ''' Takes in a year range and genre and filters with the criteria to create our Altair figure A highlighted histogram of counts of movies of different genres Inputs : year_info : List with 2 values, takes in the year information from the callback above [1970,1985] genre_input : (To be programmed) : takes in the genre information 'Drama', 'Any' Returns A histogram Altair object ''' df = alt.UrlData(data.movies.url) genre_hist = alt.Chart(df).mark_bar().transform_calculate( Release_Year="year(datum.Release_Date)", International_Gross="datum.Worldwide_Gross - datum.US_Gross" ).transform_filter( alt.datum.Release_Year <= year_slider[1]).transform_filter( alt.datum.Major_Genre != None).transform_filter( alt.datum.Release_Year >= year_slider[0]).encode( alt.Y('Major_Genre:N', axis=alt.Axis(title='Genre')), alt.X('count()'), color=alt.condition(alt.datum.Major_Genre == genre, alt.value("orange"), alt.value("gray"))).properties( title="Histogram of Genres", width=300, height=300) return genre_hist
""" Repeated Choropleth Map ======================= Three choropleths representing disjoint data from the same table. """ # category: geographic import altair as alt from vega_datasets import data pop_eng_hur = alt.UrlData(data.population_engineers_hurricanes.url) states = alt.UrlData(data.us_10m.url, format=alt.TopoDataFormat(type='topojson', feature='states')) variable_list = ['population', 'engineers', 'hurricanes'] chart = alt.Chart(states).mark_geoshape().properties( projection={ 'type': 'albersUsa' }, width=500, height=300).transform_lookup( lookup='id', from_=alt.LookupData(pop_eng_hur, 'id', variable_list)).encode( color=alt.Color(alt.repeat('row'), type='quantitative')).repeat( row=variable_list).resolve_scale(color='independent')
""" Repeated Choropleth Map ======================= Three choropleths representing disjoint data from the same table. """ # category: geographic import altair as alt from vega_datasets import data states = alt.topo_feature(data.us_10m.url, 'states') pop_eng_hur = alt.UrlData(data.population_engineers_hurricanes.url) variable_list = ['population', 'engineers', 'hurricanes'] alt.Chart(states).mark_geoshape().properties( projection={ 'type': 'albersUsa' }, width=500, height=300).encode( alt.Color(alt.repeat('row'), type='quantitative')).transform_lookup( lookup='id', from_=alt.LookupData(pop_eng_hur, 'id', variable_list)).repeat( row=variable_list).resolve_scale(color='independent')
""" Interactive Crossfilter ======================= This example shows a multi-panel view of the same data, where you can interactively select a portion of the data in any of the panels to highlight that portion in any of the other panels. """ # category: interactive charts import altair as alt from vega_datasets import data flights = alt.UrlData(data.flights_2k.url, format={'parse': {'date': 'date'}}) brush = alt.selection(type='interval', encodings=['x']) # Define the base chart, with the common parts of the # background and highlights base = alt.Chart().mark_bar().encode(x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), y='count()').properties(width=180, height=130) # blue background with selection background = base.properties(selection=brush) # yellow highlights on the transformed data highlight = base.encode(color=alt.value('goldenrod')).transform_filter(brush) # layer the two charts & repeat alt.layer(background, highlight, data=flights).transform_calculate(
""" U.S. state capitals overlayed on a map of the U.S ------------------------------------------------- This is a layered geographic visualization that shows US capitals overlayed on a map. """ # category: geographic import altair as alt from vega_datasets import data states = alt.UrlData(data.us_10m.url, format=alt.TopoDataFormat(type='topojson', feature='states')) capitals = data.us_state_capitals.url # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( title='US State Capitols', projection={'type': 'albersUsa'}, width=700, height=400 ) # Points and text hover = alt.selection(type='single', on='mouseover', nearest=True, fields=['lat', 'lon'])
import altair as alt from vega_datasets import data alt.renderers.enable('altair_viewer') movies = alt.UrlData(data.movies.url, format=alt.DataFormat(parse={"Release_Date": "date"})) ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R'] genres = [ 'Action', 'Adventure', 'Black Comedy', 'Comedy', 'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical', 'Romantic Comedy', 'Thriller/Suspense', 'Western' ] base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate( Rounded_IMDB_Rating="floor(datum.IMDB_Rating)", Hundred_Million_Production= "datum.Production_Budget > 100000000.0 ? 100 : 10", Release_Year="year(datum.Release_Date)").transform_filter( alt.datum.IMDB_Rating > 0).transform_filter( alt.FieldOneOfPredicate( field='MPAA_Rating', oneOf=ratings)).encode(x=alt.X( 'Worldwide_Gross:Q', scale=alt.Scale(domain=(100000, 10**9), clamp=True)), y='IMDB_Rating:Q', tooltip="Title:N") # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1) slider_selection = alt.selection_single(bind=year_slider,
""" London Tube Lines ================= This example shows the London tube lines against the background of the borough boundaries. It is based on the vega-lite example at https://vega.github.io/vega-lite/examples/geo_layer_line_london.html. """ # category: geographic import altair as alt from vega_datasets import data boroughs = alt.UrlData(url=data.londonBoroughs.url, format=alt.TopoDataFormat(type='topojson', feature='boroughs')) centroids = data.londonCentroids.url tubelines = alt.UrlData(url=data.londonTubeLines.url, format=alt.TopoDataFormat(type='topojson', feature='line')) background = alt.Chart(boroughs).mark_geoshape( stroke='white', strokeWidth=2).encode(color=alt.value('#eee'), ).properties(width=700, height=500) labels = alt.Chart(centroids).mark_text().encode( longitude='cx:Q', latitude='cy:Q', text='bLabel:N', size=alt.value(8), opacity=alt.value(0.6)
""" Choropleth Map ============== A choropleth map of unemployment rate per county in the US """ # category: geographic import altair as alt from vega_datasets import data counties = alt.topo_feature(data.us_10m.url,'counties') unemp_data = alt.UrlData(data.unemployment.url) alt.Chart(counties).mark_geoshape().properties( projection={'type': 'albersUsa'}, width=500, height=300 ).encode( color='rate:Q' ).transform_lookup( lookup='id', from_=alt.LookupData(unemp_data, 'id', ['rate']) )
import numpy as np import pandas as pd import altair as alt #import altair.vega.v5 as alt from vega_datasets import data import streamlit as alt source = pd.DataFrame({ 'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'], 'b': [28, 55, 43, 91, 81, 53, 19, 87, 52] }) chart = alt.Chart( data=alt.UrlData( url='https://vega.github.io/vega-datasets/data/cars.json'), mark='point', encoding=alt.FacetedEncoding( x=alt.PositionFieldDef(field='Horsepower', type='quantitative'), y=alt.PositionFieldDef(field='Miles_per_Gallon', type='quantitative'), color=alt.StringFieldDefWithCondition(field='Origin', type='nominal')), config=alt.Config( view=alt.ViewConfig(continuousHeight=300, continuousWidth=400))) st.altair_chart(chart)
""" Choropleth Map ============== A choropleth map of unemployment rate per county in the US """ # category: geographic import altair as alt from vega_datasets import data unemp_data = alt.UrlData(data.unemployment.url) counties = alt.UrlData(data.us_10m.url, format=alt.TopoDataFormat(type='topojson', feature='counties')) chart = alt.Chart(counties).mark_geoshape().properties( projection={ 'type': 'albersUsa' }, width=500, height=300).encode(color='rate:Q').transform_lookup( lookup='id', from_=alt.LookupData(unemp_data, 'id', ['rate']))
""" Cumulative Wikipedia Donations ============================== This chart shows cumulative donations to Wikipedia over the past 10 years. This chart was inspired by https://www.reddit.com/r/dataisbeautiful/comments/7guwd0/cumulative_wikimedia_donations_over_the_past_10/ but using lines instead of areas. Data comes from https://frdata.wikimedia.org/. """ import altair as alt data = alt.UrlData("https://frdata.wikimedia.org/donationdata-vs-day.csv") chart = alt.Chart(data).mark_line().encode( x=alt.X('date:T', timeUnit='monthdate', axis=alt.Axis(format='%B', title='Month')), y=alt.Y('max(ytdsum):Q', stack=None, axis=alt.Axis(title='Cumulative Donations')), color=alt.Color('date:O', timeUnit='year', legend=alt.Legend(title='Year')), order=alt.Order('data:O', timeUnit='year'))