def source_vs_hour_chart( base: alt.Chart, sensor_unit: str, max_absolute_error: float, faceted: bool = False ) -> Union[alt.Chart, alt.FacetChart]: hd_chart = ( base.mark_rect() .transform_joinaggregate( on_the_fly_mae="mean(mae)", on_the_fly_reference="mean(reference_value)", groupby=["event_start", "source"], ) .transform_calculate(accuracy=alt.datum.on_the_fly_mae) .encode( x=alt.X( "event_start:O", timeUnit="hours", axis=alt.Axis(domain=False, ticks=False, labelAngle=0), scale=alt.Scale(domain=list(range(24))), title="Hour of day", # "UTC hour of day" ), color=alt.condition( selectors.time_selection_brush, alt.Color( "accuracy:Q", scale=alt.Scale( domain=(max_absolute_error, 0), scheme="redyellowgreen" ), title="Error", ), alt.value(selectors.idle_color), ), tooltip=[ alt.Tooltip("event_start:T", timeUnit="hours", title="Hour of day"), alt.Tooltip( "accuracy:Q", title="Mean absolute error (%s)" % sensor_unit, format=".2f", ), ], ) ) if faceted: hd_chart = hd_chart.facet( row=alt.Row("source:O", title=None, header=alt.Header(labelAngle=0)) ) else: hd_chart = hd_chart.encode( y=alt.Y( "source:O", axis=alt.Axis(domain=False, ticks=False, labelAngle=0, labelPadding=5), title=None, ) ) return hd_chart.properties( title=alt.TitleParams("Model performance given a time of day", anchor="middle") )
def confusion_matrix(df=None, truth=None, pred=None, mapping=None): if df is None: df = pd.DataFrame({'truth': truth, 'pred': pred}) truth = 'truth' pred = 'pred' threshold = len(df) if mapping: assert isinstance(mapping, dict), 'mapping should be a dictionary' df[truth] = df[truth].map(lambda x: mapping[x]) df[pred] = df[pred].map(lambda x: mapping[x]) sz = 450 if len(df[truth].unique()) > 4 else 250 base = Chart(df, height=sz, width=sz).transform_aggregate( num_vals='count()', groupby=[truth, pred]).transform_calculate( rev_num_vals='-(datum.num_vals) + max(datum.num_vals)', ).encode( alt.Y(f'{truth}:O', scale=alt.Scale(paddingInner=0)), alt.X(f'{pred}:O', scale=alt.Scale(paddingInner=0)), ) hm = base.mark_rect().encode(color=alt.Color( 'num_vals:Q', scale=alt.Scale(scheme="lightorange"), legend=None)) tx = base.mark_text(baseline='middle').encode( text='num_vals:Q', # color=alt.Color(alt.value('gray')) # color='rev_num_vals:Q' # color=alt.Color( # 'num_vals:Q', scale=alt.Scale(scheme="redyellowgreen"), # ) color=alt.condition(alt.datum.num_vals > threshold, alt.value('black'), alt.value('black'))) try: from sklearn.metrics import classification_report print(classification_report(df[truth], df[pred])) except: logger.info('Skipping Report') return hm + tx