Exemplo n.º 1
0
def source_vs_hour_chart(
    base: alt.Chart, sensor_unit: str, max_absolute_error: float, faceted: bool = False
) -> Union[alt.Chart, alt.FacetChart]:
    hd_chart = (
        base.mark_rect()
        .transform_joinaggregate(
            on_the_fly_mae="mean(mae)",
            on_the_fly_reference="mean(reference_value)",
            groupby=["event_start", "source"],
        )
        .transform_calculate(accuracy=alt.datum.on_the_fly_mae)
        .encode(
            x=alt.X(
                "event_start:O",
                timeUnit="hours",
                axis=alt.Axis(domain=False, ticks=False, labelAngle=0),
                scale=alt.Scale(domain=list(range(24))),
                title="Hour of day",  # "UTC hour of day"
            ),
            color=alt.condition(
                selectors.time_selection_brush,
                alt.Color(
                    "accuracy:Q",
                    scale=alt.Scale(
                        domain=(max_absolute_error, 0), scheme="redyellowgreen"
                    ),
                    title="Error",
                ),
                alt.value(selectors.idle_color),
            ),
            tooltip=[
                alt.Tooltip("event_start:T", timeUnit="hours", title="Hour of day"),
                alt.Tooltip(
                    "accuracy:Q",
                    title="Mean absolute error (%s)" % sensor_unit,
                    format=".2f",
                ),
            ],
        )
    )
    if faceted:
        hd_chart = hd_chart.facet(
            row=alt.Row("source:O", title=None, header=alt.Header(labelAngle=0))
        )
    else:
        hd_chart = hd_chart.encode(
            y=alt.Y(
                "source:O",
                axis=alt.Axis(domain=False, ticks=False, labelAngle=0, labelPadding=5),
                title=None,
            )
        )
    return hd_chart.properties(
        title=alt.TitleParams("Model performance given a time of day", anchor="middle")
    )
Exemplo n.º 2
0
def confusion_matrix(df=None, truth=None, pred=None, mapping=None):
    if df is None:
        df = pd.DataFrame({'truth': truth, 'pred': pred})
        truth = 'truth'
        pred = 'pred'
    threshold = len(df)
    if mapping:
        assert isinstance(mapping, dict), 'mapping should be a dictionary'
        df[truth] = df[truth].map(lambda x: mapping[x])
        df[pred] = df[pred].map(lambda x: mapping[x])

    sz = 450 if len(df[truth].unique()) > 4 else 250
    base = Chart(df, height=sz, width=sz).transform_aggregate(
        num_vals='count()', groupby=[truth, pred]).transform_calculate(
            rev_num_vals='-(datum.num_vals) + max(datum.num_vals)', ).encode(
                alt.Y(f'{truth}:O', scale=alt.Scale(paddingInner=0)),
                alt.X(f'{pred}:O', scale=alt.Scale(paddingInner=0)),
            )

    hm = base.mark_rect().encode(color=alt.Color(
        'num_vals:Q', scale=alt.Scale(scheme="lightorange"), legend=None))

    tx = base.mark_text(baseline='middle').encode(
        text='num_vals:Q',
        #         color=alt.Color(alt.value('gray'))
        #         color='rev_num_vals:Q'
        #         color=alt.Color(
        #             'num_vals:Q', scale=alt.Scale(scheme="redyellowgreen"),
        #         )
        color=alt.condition(alt.datum.num_vals > threshold, alt.value('black'),
                            alt.value('black')))

    try:
        from sklearn.metrics import classification_report
        print(classification_report(df[truth], df[pred]))
    except:
        logger.info('Skipping Report')
    return hm + tx