def barChart(self, Model): years = range(self.year - 2, min(self.year + 3, datetime.now().year + 1)) conditions = ('fair', 'good', 'excellent', 'like new') df = pd.DataFrame(list(product(years, conditions)), columns=['year', 'condition']) for col in set(self.df.columns).difference(set(df.columns)): df[col] = self.df[col][0] cols = [ 'state', 'year', 'manufacturer', 'model', 'odometer', 'transmission', 'condition', 'BaseMSRP' ] df = df[cols] df['price'] = Model.model.predict(df) barChart = alt.Chart(df).mark_bar().encode( x=alt.X('condition:O', sort=['fair', 'good', 'excellent', 'like new']), y='price:Q', color=alt.Color('condition:O', sort=['fair', 'good', 'excellent', 'like new'], scale=alt.Scale(scheme='purplegreen')), column='year:N').properties(height=240, width=140, padding=10, autosize=alt.AutoSizeParams( type='fit', contains='padding')) #return barChart.to_json(indent=None).encode('utf-8') return barChart.to_json(indent=None)
def _rankingmap_altair(countries, ranking, x, scenario=None, method='number', title='', label=''): # Adapted from https://altair-viz.github.io/gallery/index.html import pandas as pd import altair as alt if method not in ['number', 'value']: raise ValueError('method must be "number" or "value"') source = alt.Data(values=countries) if ranking.plot_type == 'indicator_vs_temperature': details = 'warming level: {} {}'.format(x, ranking.plot_unit_x) else: details = 'period: {}, scenario: {}'.format( x, { 'rcp26': 'RCP 2.6', 'rcp45': 'RCP 4.5', 'rcp60': 'RCP 6', 'rcp85': 'RCP 8.5' }.get(scenario, scenario)) default_title = getattr(ranking, 'plot_label_y', '') + '\n' + details # default_label = 'ranking number' if method == 'number' else ('ranking value ({})'.format(getattr(ranking, 'plot_unit_y'))) ranking_data = get_ranking_data(countries, ranking, x, scenario, method) chart = alt.Chart(source).mark_geoshape().encode( # color="Rank:Q", color=alt.Color("Rank:Q", sort='ascending') if method == 'number' else alt.Color("Value:Q", sort='descending'), # tooltip=["Country:N", "Code:N", "Value:Q", "Rank:Q"] tooltip=[ "label:N", "unit:N", "Country:N", "Code:N", "Value:Q", "Rank:Q" ]).transform_lookup(lookup='properties.ISIPEDIA', from_=alt.LookupData(ranking_data, 'Code', ranking_data.columns.tolist()) ).project('naturalEarth1').properties( width=800, autosize=alt.AutoSizeParams(contains="padding", type="fit-x"), title=ranking.plot_title # ).configure_view(stroke=None ).configure(background='#F1F4F4').configure_title( fontSize=16, ).configure_axis( labelFontSize=14, titleFontSize=16, ).configure_legend( titleFontSize=14, labelFontSize=14, ).configure_mark(fontSize=14) # ).interactive() return chart
def _countrymap_altair(mapdata, countrymasksnc, jsfile, x=None, scenario=None, climate=None, impact=None, title='', label=''): """ """ import altair as alt import numpy as np import pandas as pd area = jsfile.area name, ext = os.path.splitext(os.path.basename(jsfile.filename)) if name.endswith(area): name = name[:-len(area) - 1] bnds = mapdata.bounds(area) worldmap = mapdata.get(name, x, scenario, climate, impact) localmap = bnds.extract(worldmap) if 'm_' + area in countrymasksnc.variables: mask = bnds.extract(countrymasksnc['m_' + area]) > 0 elif area == 'world': mask = np.zeros_like(worldmap, dtype=bool) for k in countrymasksnc.variables: if not k.startswith('m_'): continue mask[countrymasksnc[k][:] > 0] = True else: mask = np.ones_like(worldmap, dtype=bool) localmap = localmap[::-1] mask = mask[::-1] ni, nj = localmap.shape # l, r, b, t = bnds.extent # x = np.linspace(l, r, nj) # y = np.linspace(t, b, ni) l, r, b, t = bnds.indices x = countrymasksnc['lon'][l:r + 1] y = countrymasksnc['lat'][t:b + 1][::-1] X, Y = np.meshgrid(x, y) l, r, b, t = bnds.extent aspect = (t - b) / (r - l) / np.cos(np.deg2rad((t + b) / 2)) print(jsfile.area, 'aspect', aspect) # Convert this grid to columnar data expected by Altair source = pd.DataFrame({ 'lon': X[mask].round(2), 'lat': Y[mask].round(2), 'z': localmap[mask] }) chart = alt.Chart(source).mark_rect().encode( x='lon:O', y=alt.Y('lat:O', sort='descending'), color=alt.Color('z:Q', title=''), tooltip=[ alt.Tooltip('z:Q', title='{} ({})'.format(jsfile.plot_label_y, jsfile.plot_unit_y)), 'lon:Q', 'lat:Q' ]).properties( title=jsfile.plot_title, width=800, height=int(800 * aspect), autosize=alt.AutoSizeParams(contains="padding", type="fit-x"), ).configure(background='#F1F4F4').configure_header( titleFont="IBM Plex Sans", titleFontSize=20, labelFont="IBM Plex Sans", labelFontSize=18, ).configure_title(fontSize=16, ).configure_axis( labelFontSize=14, titleFontSize=16, ).configure_legend( titleFontSize=14, labelFontSize=14, ).configure_mark(fontSize=14).interactive() return chart
def _lineplot_altair_temp_advanced(data, x=None, scenario=None, climate=None, impact=None, shading=False, title='', xlabel='', ylabel=''): import pandas as pd import altair as alt df = pd.concat([pd.DataFrame(l) for l in data.filter_lines()]) df["model"] = df.climate + " / " + df.impact # Divide by 100 so we can percent-format in the plot df.y = df.y / 100 selection_climate = alt.selection_multi(fields=['climate'], bind='legend') nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=["x", 'y'], empty='none') base = alt.Chart(df[(df.climate != "median")]) color = alt.Color('climate', scale=alt.Scale(scheme="dark2"), title='Climate Model') area = base.mark_area(opacity=0.3).encode( x=alt.X("x", scale=alt.Scale(domain=[0, df['x'].max()])), color=color, y=alt.Y(field="y", type="quantitative", axis=alt.Axis(format='%'), aggregate="min"), y2=alt.Y2(field="y", aggregate="max"), opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)), ).add_selection(selection_climate) lines = base.mark_line().encode( x=alt.X("x"), y=alt.Y("y", axis=alt.Axis(format='%')), detail=["climate", "impact"], color=color, opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)), size=alt.condition("datum.impact == 'median'", alt.value(5), alt.value(1)), ) points = base.mark_point().encode( x=alt.X("x", axis=alt.Axis( title=xlabel or '{} ({})'.format(data.plot_label_x, data.plot_unit_x), values=data.x)), y=alt.Y("y", axis=alt.Axis(title=data.plot_unit_y, format='%')), detail=["climate", "impact"], color=color, opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)), size=alt.value(12)) text_model = points.mark_text(align='left', dx=-5, dy=-6).encode( text=alt.condition(nearest, "model", alt.value(' ')), opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)), color=alt.value("black")).add_selection(nearest) text_pct = points.mark_text(align='left', dx=-5, dy=6).encode( text=alt.condition(nearest, "y", alt.value(' '), format=".2p"), opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)), color=alt.value("black")) chart = (area + lines + points + text_model + text_pct).properties( title=data.plot_title, width=800, autosize=alt.AutoSizeParams(contains="padding", type="fit-x"), ) return configure_chart(chart).interactive()
def _lineplot_altair_temp(data, x=None, scenario=None, climate=None, impact=None, shading=False, title='', xlabel='', ylabel=''): # median data df = data.to_pandas().loc[scenario] lower = df.min(axis=0) / 100 upper = df.max(axis=0) / 100 median = df.loc['median', 'median'] / 100 df2 = pd.DataFrame({ 'lower': lower, 'upper': upper, 'median': median, 'climate': 'Median' }).reset_index() if not title: title = data.plot_title if not xlabel: xlabel = '{} ({})'.format(data.plot_label_x, data.plot_unit_x) # xlabel = data.plot_unit_x if not ylabel: # ylabel = '{} ({})'.format(data.plot_label_y, data.plot_unit_y) ylabel = data.plot_unit_y # if data.plot_type == 'indicator_vs_timeslices': # x = [xx.split('-') for xx in df2['x']] # df2['x'] = [(int(y1)+int(y2))/2 for y1, y2 in x] # axisX = alt.X('x:Q', title=xlabel, scale=alt.Scale(domain=[1900, 2100])) # else: axisX = alt.X('x:Q', title=xlabel, scale=alt.Scale(domain=[0, df2['x'].max()]), axis=alt.Axis(values=data.x)) base = alt.Chart(df2) nearest = alt.selection(type='single', nearest=True, on='mouseover', empty='none') # fields=["x", "median"], empty='none') # axisY = alt.Y('median:Q', title=ylabel, axis=alt.Y(format='%')) axisY = alt.Y('median:Q', title=ylabel, axis=alt.Axis(format='%')) color = 'orange' color2 = alt.Color( 'climate', title='Climate Model', # scale=alt.Scale(scheme='tableau10')) scale=alt.Scale(domain=['Median'], range=[color])) area = base.mark_area(opacity=0.3, color=color).encode( x=axisX, y=alt.Y('lower:Q'), y2=alt.Y2('upper:Q'), ) lines = base.mark_line(color=color).encode( x=axisX, y=axisY, ) points = base.mark_point(size=60).encode( x=axisX, y=axisY, color=color2, tooltip=[ alt.Tooltip('x:Q', title=xlabel), alt.Tooltip('median:Q', title=ylabel, format='.1%') ], ) chart = (points + lines + area).properties( title=title, width=800, autosize=alt.AutoSizeParams(contains="padding", type="fit-x"), ) return configure_chart(chart).interactive()
def _lineplot_altair_time_advanced(data, x=None, scenario=None, climate=None, impact=None, shading=False, title='', xlabel='', ylabel=''): import pandas as pd import altair as alt df = pd.concat([pd.DataFrame(l) for l in data.filter_lines()]) # Divide by 100 so we can percent-format in the plot df.y = df.y / 100 df["x_range"] = df.x df.x = df.x.apply(lambda x: int(x.split("-")[1]) - 10) df = df[df.x < 2100] # Fill in gap by duplicating historical values to future scenarios extra = df[(df.scenario == "historical") & (df.x == 1990)].copy() extra.at[:, "scenario"] = "rcp60" df = df.append(extra) extra = df[(df.scenario == "historical") & (df.x == 1990)].copy() extra.at[:, "scenario"] = "rcp26" df = df.append(extra) df["model"] = df.climate + " / " + df.impact df = df.replace(scenario_map) # print(df) # ------------------ axisX = alt.X('x:Q', title=xlabel or 'Time', scale=alt.Scale(domain=[1900, 2100]), axis=alt.Axis(format="i", values=np.arange(1900, 2100 + 1, 20).tolist())) selection_climate = alt.selection_multi(fields=['scenario'], bind='legend') nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=["x", 'y'], empty='none') base = alt.Chart(df[(df.climate != "median")]) # color = alt.Color('scenario', scale=alt.Scale(scheme="tableau10")) color = alt.Color( 'scenario', title='Climate Scenario', # scale=alt.Scale(scheme='tableau10')) scale=alt.Scale( domain=list(scenario_map.values()), # scale=alt.Scale(domain=list(scenario_map.keys()), range=['#4674b9', '#80b946', 'orange'])) rule_data = pd.DataFrame({'line': [2005]}) rule_text_data = pd.DataFrame([ { "year": 1910, "text": "Historical Period" }, { "year": 2015, "text": "Future Projections" }, ]) rule = alt.Chart(rule_data).mark_rule().encode(x='line:Q') rule_text = alt.Chart(rule_text_data).mark_text( align="left", dy=-130).encode(x="year", text="text") area = base.mark_area(opacity=0.3).encode( x=axisX, color=color, y=alt.Y(field="y", type="quantitative", axis=alt.Axis(format='%'), aggregate="min"), y2=alt.Y2(field="y", aggregate="max"), opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)), ).add_selection(selection_climate) lines = base.mark_line().encode( x=axisX, y=alt.Y("y", axis=alt.Axis(format='%')), detail=["climate", "impact", "scenario"], color=color, opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)), size=alt.condition("datum.impact == 'median'", alt.value(5), alt.value(1))) points = base.mark_point().encode( x=axisX, y=alt.Y("y", axis=alt.Axis(title=data.plot_unit_y, format='%')), detail=["climate", "impact", "scenario"], color=color, opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)), size=alt.value(12), ) text_model = points.mark_text(align='left', dx=-5, dy=-6).encode( text=alt.condition(nearest, "model", alt.value(' ')), opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)), color=alt.value("black")).add_selection(nearest) text_pct = points.mark_text(align='left', dx=-5, dy=6).encode( text=alt.condition(nearest, "y", alt.value(' '), format=".2p"), opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)), color=alt.value("black")) chart = (area + rule + rule_text + lines + points + text_model + text_pct).properties( title=title or data.plot_title, width=800, autosize=alt.AutoSizeParams(contains="padding", type="fit-x"), ) # chart.save("chart.json") return configure_chart(chart).interactive()
def gen_sample_plot(metadata): """Uses Altair to generate a JSON Vega-Lite spec for the sample plot. Parameters ---------- metadata: pd.DataFrame DataFrame containing sample metadata information. (Indices correspond to samples, and columns correspond to sample metadata fields.) This should have already been matched with the BIOM table, had empty samples removed, etc. Returns ------- sample_chart_json: dict A dict version of the alt.Chart for the sample plot. """ sample_metadata = metadata.copy() # Used to set color default_metadata_col = sample_metadata.columns[0] # Since we don't bother setting a default log-ratio, we set the balance for # every sample to None so that Vega* will filter them out (producing an # empty scatterplot by default, which makes sense). sample_metadata["qurro_balance"] = None # "Reset the index" -- make the sample IDs a column (on the leftmost side) # First we rename the index "Sample ID", though. (Note that our use of # check_column_names() means that there shouldn't be any sample metadata # fields named "Sample ID".) sample_metadata.rename_axis("Sample ID", axis="index", inplace=True) sample_metadata.reset_index(inplace=True) # Create sample plot chart Vega-Lite spec using Altair. sample_chart = (alt.Chart( sample_metadata, title="Samples", background="#FFFFFF", autosize=alt.AutoSizeParams(resize=True), ).mark_circle().encode( alt.X( default_metadata_col, type="nominal", axis=alt.Axis(labelAngle=-45), ), alt.Y( "qurro_balance:Q", title="Current Log-Ratio", type="quantitative", ), color=alt.Color(default_metadata_col, type="nominal"), tooltip=["Sample ID:N", "qurro_balance:Q"], ).configure_range( ramp=alt.SchemeConfig(scheme="blues"), category=alt.SchemeConfig(scheme="tableau10"), ).configure_axis(labelBound=True).interactive()) # Replace the "mark": "circle" definition with a more explicit one. This # will be useful when adding attributes to the boxplot mark in the # visualization. (We have to resort to this hack because I haven't been # able to successfully use alt.MarkDef in the alt.Chart definition above.) sample_chart_dict = sample_chart.to_dict() sample_chart_dict["mark"] = {"type": "circle"} sm_fields = "qurro_sample_metadata_fields" check_json_dataset_names(sample_chart_dict, sm_fields) # Specify an alphabetical ordering for the sample metadata field names. # This will be used for populating the x-axis / color field selectors in # Qurro's sample plot controls. # # Importantly, this is case insensitive (by default, the json.dumps # sort_keys parameter considers names like "Sample ID" to occur before # names like "age" due to casing -- we use this list to get around this). # Solution based on this article: # https://www.afternerd.com/blog/python-sort-list/#sort-strings-case-insensitive # # Also, we remove qurro_balance from this list because it shouldn't be # exposed to the user in the Qurro interface. (It's already used on the # y-axis of the sample plot automatically.) sorted_md_cols = list(sorted(sample_metadata.columns, key=str.lower)) sorted_md_cols.remove("qurro_balance") sample_chart_dict["datasets"][sm_fields] = sorted_md_cols return sample_chart_dict
def gen_rank_plot(V, ranking_ids, feature_metadata_cols): """Uses Altair to generate a JSON Vega-Lite spec for the rank plot. Parameters ---------- V: pd.DataFrame DataFrame containing feature rank (and feature metadata, if applicable) information. (Indices correspond to features, and columns correspond to feature ranking or feature metadata fields.) This should have already been matched with the BIOM table, filtered (if -x passed), had empty features removed, etc. ranking_ids: pd.Index IDs of the actual "feature ranking" columns in V. feature_metadata_cols: pd.Index or list IDs of the "feature metadata" columns in V (if there wasn't any feature metadata provided, this can just be an empty list). Returns ------- rank_chart_json: dict A dict version of the alt.Chart for the rank plot, with qurro_rank_ordering and qurro_feature_metadata_ordering datasets added in indicating which columns describe feature rankings and which describe feature metadata. """ rank_data = V.copy() # NOTE that until this point we've treated the actual rank values as just # "objects", as far as pandas is concerned. However, if we continue to # treat them as objects when sorting them, we'll get a list of feature # ranks in lexicographic order... which is not what we want. So we just # ensure that all of the columns contain numeric data. for col in ranking_ids: rank_data[col] = pd.to_numeric(rank_data[col]) # The default rank column is just whatever the first rank is. This is what # the rank plot will use when it's first drawn. default_rank_col = ranking_ids[0] # Set default classification of every feature to "None" # (This value will be updated when a feature is selected in the rank plot # as part of the numerator, denominator, or both parts of the current log # ratio.) rank_data["qurro_classification"] = "None" # Replace "index" with "Feature ID". looks nicer in the visualization :) rank_data.rename_axis("Feature ID", axis="index", inplace=True) rank_data.reset_index(inplace=True) # Now, we can actually create the rank plot. rank_chart = ( alt.Chart( rank_data, title="Features", background="#FFFFFF", autosize=alt.AutoSizeParams(resize=True), ).mark_bar().transform_window( sort=[alt.SortField(field=default_rank_col, order="ascending")], # We don't use an alt.WindowFieldDef here because python gets # confused when you use "as" as an actual argument name. So we just # use this syntax. window=[{ "op": "row_number", "as": "qurro_x" }], ).encode( # type="ordinal" needed on the scale here to make bars adjacent; # see https://stackoverflow.com/a/55544817/10730311. x=alt.X( "qurro_x", title="Feature Rankings", type="ordinal", scale=alt.Scale(paddingOuter=1, paddingInner=0, rangeStep=1), axis=alt.Axis(ticks=False, labelAngle=0), ), y=alt.Y(default_rank_col, type="quantitative"), color=alt.Color( "qurro_classification", title="Log-Ratio Classification", scale=alt.Scale( domain=["None", "Numerator", "Denominator", "Both"], range=["#e0e0e0", "#f00", "#00f", "#949"], ), ), tooltip=[ alt.Tooltip( field="qurro_x", title="Current Ranking", type="quantitative", ), alt.Tooltip( field="qurro_classification", title="Log-Ratio Classification", type="nominal", ), "Feature ID", *feature_metadata_cols, *ranking_ids, ], ).configure_axis( # Done in order to differentiate "None"-classification features # from grid lines gridColor="#f2f2f2", labelBound=True, ).interactive()) rank_chart_json = rank_chart.to_dict() rank_ordering = "qurro_rank_ordering" fm_col_ordering = "qurro_feature_metadata_ordering" check_json_dataset_names(rank_chart_json, rank_ordering, fm_col_ordering) # Note we don't use rank_data.columns for setting the rank ordering. This # is because rank_data's columns now include both the ranking IDs and the # "Feature ID" and "qurro_classification" columns (as well as any feature # metadata the user saw fit to pass in). rank_chart_json["datasets"][rank_ordering] = list(ranking_ids) rank_chart_json["datasets"][fm_col_ordering] = list(feature_metadata_cols) return rank_chart_json
def missingDates(data, freq="D", format='%Y-%m-%d', returnType="viz"): """ Check for Missing Dates This function is used to return either a list of missing dates from a pd.Series or chart the missing dates. Attributes ---------- data : pd.Series, default None A Pandas series that contains dates. Dates will be parsed using pd.to_datetime() with a default strftime of '%Y-%m-%d'. Use strftime arg to alter date format freq: object, default '%Y-%m-%d' The expected frequency of the dates. Valid options are: B: business day frequency C: custom business day frequency D: calendar day frequency W: weekly frequency M: month end frequency SM: semi-month end frequency (15th and end of month) BM: business month end frequency CBM: custom business month end frequency MS: month start frequency SMS: semi-month start frequency (1st and 15th) BMS: business month start frequency CBMS: custom business month start frequency Q: quarter end frequency BQ: business quarter end frequency QS: quarter start frequency BQS: business quarter start frequency A, Y: year end frequency BA, BY: business year end frequency AS, YS: year start frequency BAS, BYS: business year start frequency BH: business hour frequency H: hourly frequency T, min: minutely frequency S: secondly frequency L, ms: milliseconds U, us: microseconds N: nanoseconds format: returnType: object, default viz One of: missing: Return the missing dates all: Return missing and present dates viz: Return a vizualisation """ try: assert type(data) == pd.Series datesToCheck = pd.to_datetime(list(data)) minDate = datesToCheck.min().strftime("%Y-%m-%d") maxDate = datesToCheck.max().strftime("%Y-%m-%d") computedRange = pd.date_range(minDate, maxDate, freq=freq) allChecks = [] for date in computedRange: currentDateResult = date in datesToCheck currentDate = {"date": date, "exists": currentDateResult} allChecks.append(currentDate) allChecks = pd.DataFrame(allChecks) missing = list(allChecks.exists).count(False) present = list(allChecks.exists).count(True) total = present + missing allChecks['date'] = allChecks.date.map( lambda x: pd.to_datetime(x).strftime('%Y-%m-%d')) calculatedTitle = "Total Dates: " + str( total) + ", Missing Dates: " + str(missing) + ", (" + str( int((missing / total) * 100)) + "%)" scale = alt.Scale(domain=['true', 'false'], range=['#B8E986', '#F15545']) if returnType == 'viz': xRange = len(computedRange) barSize = (800 / xRange) padding = (barSize / 2) + 1 results = alt.Chart(allChecks, title=alt.TitleParams(calculatedTitle, anchor="start", offset=20, orient="top"), width=800, height=400, autosize=alt.AutoSizeParams( contains="content", resize=True, type="fit")).mark_bar(size=barSize).encode( x=alt.X( "date", axis=alt.Axis(tickCount=xRange), title="Date", type="temporal", scale=alt.Scale(padding=padding)), y="count()", color=alt.Color("exists", scale=scale), tooltip=[ alt.Tooltip("date", format="%Y-%m-%d", type="temporal"), "exists", "count()" ]).interactive(bind_y=False) elif returnType == "missing": results = allChecks[allChecks['exists'] == False] elif returnType == "all": results = allChecks return results except Exception as E: raise E