Пример #1
0
def prepare_weighted_spans(targets, preserve_density=None):
    # type: (List[TargetExplanation], bool) -> List[List[PreparedWeightedSpans]]
    """ Return weighted spans prepared for rendering.
    Calculate a separate weight range for each different weighted
    span (for each different index): each target has the same number
    of weighted spans.
    """
    targets_char_weights = [
        [get_char_weights(ws, preserve_density=preserve_density)
         for ws in t.weighted_spans.docs_weighted_spans]
        if t.weighted_spans else None
        for t in targets]  # type: List[List[np.ndarray]]
    max_idx = max_or_0(len(ch_w or []) for ch_w in targets_char_weights)
    spans_weight_ranges = [
        max_or_0(
            abs(x) for char_weights in targets_char_weights
            for x in char_weights[idx] if char_weights is not None)
        for idx in range(max_idx)]
    return [
        [PreparedWeightedSpans(ws, char_weights, weight_range)
         for ws, char_weights, weight_range in zip(
            t.weighted_spans.docs_weighted_spans,
            t_char_weights,
            spans_weight_ranges)]
        if t_char_weights is not None else None
        for t, t_char_weights in zip(targets, targets_char_weights)]
Пример #2
0
def get_weight_range(weights):
    # type: (FeatureWeights) -> float
    """ Max absolute feature for pos and neg weights.
    """
    return max_or_0(abs(fw.weight)
                    for lst in [weights.pos, weights.neg]
                    for fw in lst or [])
Пример #3
0
def get_eli5_weights(model: BaseModel, docs: List):
    """ Return eli5 feature weights (as a dict) with added color info.
    """
    logging.info('explaining weights')
    try:
        expl = model.explain_predictions(docs)
    except NotImplementedError:
        expl = model.explain_weights()
    logging.info('model weights:\n{}'.format(
        format_as_text(expl, show=fields.WEIGHTS)))

    if expl.targets:
        weights = expl.targets[0].feature_weights
        weight_range = get_weight_range(weights)
        for w_lst in [weights.pos, weights.neg]:
            w_lst[:] = [{
                'feature':
                fw.feature,
                'weight':
                fw.weight,
                'hsl_color':
                format_hsl(weight_color_hsl(fw.weight, weight_range)),
            } for fw in w_lst]
        weights.neg.reverse()
        return format_as_dict(weights)
    elif expl.feature_importances:
        importances = expl.feature_importances.importances
        weight_range = max_or_0(abs(fw.weight) for fw in importances)
        return {
            'pos': [{
                'feature':
                fw.feature,
                'weight':
                float(fw.weight),
                'hsl_color':
                format_hsl(weight_color_hsl(fw.weight, weight_range)),
            } for fw in importances],
            'neg': [],
            'pos_remaining':
            int(expl.feature_importances.remaining),
            'neg_remaining':
            0,
        }
    else:
        return {}
Пример #4
0
def format_as_html(explanation,
                   include_styles=True,
                   force_weights=True,
                   show=fields.ALL,
                   preserve_density=None,
                   highlight_spaces=None,
                   horizontal_layout=True,
                   show_feature_values=False):
    """ Format explanation as html.
    Most styles are inline, but some are included separately in <style> tag,
    you can omit them by passing ``include_styles=False`` and call
    ``format_html_styles`` to render them separately (or just omit them).
    With ``force_weights=False``, weights will not be displayed in a table for
    predictions where it is possible to show feature weights highlighted
    in the document.
    If ``highlight_spaces`` is None (default), spaces will be highlighted in
    feature names only if there are any spaces at the start or at the end of the
    feature. Setting it to True forces space highlighting, and setting it to
    False turns it off.
    If ``horizontal_layout`` is True (default), multiclass classifier
    weights are laid out horizontally.
    If ``show_feature_values`` is True, feature values are shown if present.
    Default is False.
    """
    template = template_env.get_template('explain.html')
    if highlight_spaces is None:
        highlight_spaces = should_highlight_spaces(explanation)
    targets = explanation.targets or []
    if len(targets) == 1:
        horizontal_layout = False
    explaining_prediction = has_any_values_for_weights(explanation)
    show_feature_values = show_feature_values and explaining_prediction

    rendered_weighted_spans = render_targets_weighted_spans(
        targets, preserve_density)
    weighted_spans_others = [
        t.weighted_spans.other if t.weighted_spans else None for t in targets
    ]

    return template.render(
        include_styles=include_styles,
        force_weights=force_weights,
        target_table_styles=
        'border-collapse: collapse; border: none; margin-top: 0em; table-layout: auto;',
        tr_styles='border: none;',
        # Weight (th and td)
        td1_styles='padding: 0 1em 0 0.5em; text-align: right; border: none;',
        # N more positive/negative
        tdm_styles='padding: 0 0.5em 0 0.5em; text-align: center; border: none; '
        'white-space: nowrap;',
        # Feature (th and td)
        td2_styles='padding: 0 0.5em 0 0.5em; text-align: left; border: none;',
        # Value (th and td)
        td3_styles='padding: 0 0.5em 0 1em; text-align: right; border: none;',
        horizontal_layout_table_styles=
        'border-collapse: collapse; border: none; margin-bottom: 1.5em;',
        horizontal_layout_td_styles=
        'padding: 0px; border: 1px solid black; vertical-align: top;',
        horizontal_layout_header_styles=
        'padding: 0.5em; border: 1px solid black; text-align: center;',
        show=show,
        expl=explanation,
        hl_spaces=highlight_spaces,
        horizontal_layout=horizontal_layout,
        any_weighted_spans=any(t.weighted_spans for t in targets),
        feat_imp_weight_range=max_or_0(
            abs(fw.weight)
            for fw in explanation.feature_importances.importances)
        if explanation.feature_importances else 0,
        target_weight_range=max_or_0(
            get_weight_range(t.feature_weights) for t in targets),
        other_weight_range=max_or_0(
            get_weight_range(other) for other in weighted_spans_others
            if other),
        targets_with_weighted_spans=list(
            zip(targets, rendered_weighted_spans, weighted_spans_others)),
        show_feature_values=show_feature_values,
        weights_table_span=3 if show_feature_values else 2,
        explaining_prediction=explaining_prediction,
        weight_help=html_escape(WEIGHT_HELP),
        contribution_help=html_escape(CONTRIBUTION_HELP),
    )