示例#1
0
def explain_pred_contrib(id,
                         clf,
                         X,
                         features,
                         cats=None,
                         waterfall={
                             'rotation_value': 60,
                             'threshold': None
                         }):
    try:
        p = clf.predict_proba(X.loc[X.index == id])[:, 1]
    except:
        p = clf.predict_proba(X.loc[X.index == id].values)[:, 1]
    print(
        f'Prediction explanation for ID: {id}; Probability of event (y=1): {np.round(p[0], 3)}\nModel used: {type(clf)}'
    )
    try:
        df = eli5.show_prediction(clf,
                                  X.loc[id],
                                  show_feature_values=True,
                                  feature_names=features)
        exp = eli5.explain_prediction_df(clf,
                                         X.loc[id],
                                         feature_names=features)
    except:
        df = eli5.show_prediction(clf,
                                  X.loc[id].values,
                                  show_feature_values=True,
                                  feature_names=features)
        exp = eli5.explain_prediction_df(clf,
                                         X.loc[id].values,
                                         feature_names=features)

    if cats is not None:
        c = id2class(exp, cats)
        for k, v in c.items():
            df.data = df.data.replace(k, v)

    if waterfall is not None:
        rot = waterfall['rotation_value']
        threshold = waterfall['threshold']
        waterfall_chart.plot(exp.feature,
                             exp.weight,
                             rotation_value=rot,
                             net_label="Final Score/Proba",
                             other_label="Minor Features",
                             formatting="{:,.2f}",
                             threshold=threshold,
                             Title='Waterfall of features contributions')
    return df
示例#2
0
def ff_display(df, index_cols, waterfall_cols=None, monthly=False):
    display(HTML("<b>Fama French factors:</b>"))
    ff_weights_ = ff_weights(df, index_cols)
    display(ff_weights_)

    print("")
    display(HTML("<b>Contributions to return:</b>"))
    ff_importances_ = ff_importances(df,
                                     ff_weights_) * (12. if monthly else 1.)
    display(ff_importances_)

    if waterfall_cols is None:
        waterfall_cols = index_cols

    for col in waterfall_cols:
        waterfall_chart.plot(ff_importances_.index,
                             ff_importances_[col] * 100,
                             formatting="{:,.2f}%",
                             Title=col)
示例#3
0
def ShapWaterFall(Model, X_tng, X_sc, ref1, ref2, num_feature):
    import pandas as pd
    import numpy as np
    import shap
    import matplotlib.pyplot as plt
    import waterfall_chart

    # label names until we figure out how sql alchemy can fully work on Linux
    clients_to_show = [ref1, ref2]

    # Data Frame management
    if isinstance(X_sc, pd.DataFrame):
        X_v = X_sc
    else:
        X_v = pd.DataFrame(X_sc)
    if isinstance(X_tng, pd.DataFrame):
        X_t = X_tng
    else:
        X_t = pd.DataFrame(X_tng)

    # SHAP Values
    explainer = shap.TreeExplainer(Model, shap.sample(X_t, 100))

    # Data
    data_for_prediction1 = X_v[(X_v.Reference == clients_to_show[0])]
    data_for_prediction1 = data_for_prediction1.drop('Reference', 1)
    data_for_prediction2 = X_v[(X_v.Reference == clients_to_show[1])]
    data_for_prediction2 = data_for_prediction2.drop('Reference', 1)

    # Insert a binary option to ensure order goes from lower to higher propensity
    if Model.predict_proba(data_for_prediction1)[:, 1] <= Model.predict_proba(
            data_for_prediction2)[:, 1]:
        frames = [data_for_prediction1, data_for_prediction2]
    else:
        frames = [data_for_prediction2, data_for_prediction1]
        clients_to_show = [ref2, ref1]

    # Computations for Waterfall Chart
    data_for_prediction = pd.concat(frames)
    data_for_prediction = pd.DataFrame(data_for_prediction)
    feature_names = data_for_prediction.columns.values
    shap_values = explainer.shap_values(data_for_prediction)
    Feat_contrib = pd.DataFrame(list(map(np.ravel, shap_values[1])),
                                columns=feature_names)
    counter1 = len(Feat_contrib.columns)
    Feat_contrib['base_line_diff'] = Feat_contrib.sum(axis=1)
    Feat_contrib['prediction'] = Model.predict_proba(data_for_prediction)[:, 1]
    Feat_contrib[
        'baseline'] = Feat_contrib.prediction - Feat_contrib.base_line_diff
    diff_df = pd.DataFrame({
        'features': Feat_contrib.diff().iloc[1, :].index,
        'contrib': Feat_contrib.diff().iloc[1, :].values
    })[:counter1].sort_values(by='contrib',
                              ascending=False).reset_index(drop=True)

    # Waterfall Chart
    plt.rcParams.update({'figure.figsize': (16, 12), 'figure.dpi': 100})
    xlist = [[
        clients_to_show[0],
        'Other {a} Features'.format(a=counter1 - num_feature)
    ],
             diff_df.features.tolist()[:num_feature]]
    xlist = [item for sublist in xlist for item in sublist]
    ylist = [[
        np.round(Feat_contrib.prediction[0], 6),
        np.round(diff_df.contrib[num_feature:].sum(), 6)
    ],
             np.round(diff_df.contrib.tolist(), 6)[:num_feature]]
    ylist = [item for sublist in ylist for item in sublist]
    waterfall_df = pd.DataFrame({"x_values": xlist, 'y_values': ylist})
    plt.rcParams.update({'figure.figsize': (16, 12), 'figure.dpi': 100})
    plot = waterfall_chart.plot(xlist,
                                ylist,
                                net_label=str(clients_to_show[1]),
                                rotation_value=90,
                                formatting='{:,.3f}')
    plot.show()
示例#4
0
def printstudentreport(uid):
    taggedWords = []
    pos = []
    neg = []
    s = User.objects.get(id=uid)
    for ans in SkillAnswer.objects.filter(
            student=s):  #filter(date.year == datetime.date.today().year):
        if (ans.date.year == datetime.date.today().year):
            temp = ans.tags.split(',')
            for t in temp:
                taggedWords.append(t)

    sentimentanalyzer = SentimentIntensityAnalyzer()
    # print(taggedWords)
    for j in taggedWords:
        if ((sentimentanalyzer.polarity_scores(j))['compound'] > 0.3):
            pos.append(j)
        if ((sentimentanalyzer.polarity_scores(j))['compound'] < 0.0):
            neg.append(j)
    pl = ' '.join(pos)
    nl = ' '.join(neg)

    #positive wc
    wordcloud = WordCloud(background_color='white',
                          max_words=200,
                          max_font_size=80,
                          random_state=42).generate(pl)
    plt.figure()
    plt.tight_layout()
    fig = plt.imshow(wordcloud)
    plt.axis('off')
    fig2 = plt.gcf()
    buf2 = io.BytesIO()
    fig2.savefig(buf2, format="png", bbox_inches='tight')
    buf2.seek(0)
    string2 = base64.b64encode(buf2.read())
    uri2 = urllib.parse.quote(string2)
    #urilist2.append(uri2)
    plt.close()

    #negative wc
    wordcloud = WordCloud(background_color='white',
                          max_words=200,
                          max_font_size=80,
                          random_state=42).generate(nl)
    wordcloud.recolor(color_func=grey_color_func)
    plt.figure()
    plt.tight_layout()
    fig = plt.imshow(wordcloud)
    plt.axis('off')
    fig3 = plt.gcf()
    buf3 = io.BytesIO()
    fig3.savefig(buf3, format="png", bbox_inches='tight')
    buf3.seek(0)
    string3 = base64.b64encode(buf3.read())
    uri3 = urllib.parse.quote(string3)
    plt.close()

    s = User.objects.get(id=uid)
    dictY = {}
    sa = SkillAnswer.objects.filter(student=s).order_by('date')
    for ans in sa:
        if ans.date.year in dictY:
            dictY[ans.date.year] += ans.sentiment
        else:
            dictY[ans.date.year] = ans.sentiment
    a = list(dictY.values())
    #a[0]=1.5
    for i in range(1, len(a)):
        a[i] = a[i] - a[i - 1]

    b = [float(x) for x in list(dictY.keys())]
    #a=[0,1,2]
    print(a)
    print(b)
    buf = io.BytesIO()
    tempVar = waterfall_chart.plot(b, a).savefig(buf,
                                                 format="png",
                                                 bbox_inches='tight')
    #fig.show()
    buf.seek(0)
    string = base64.b64encode(buf.read())
    uri4 = urllib.parse.quote(string)
    #tempVar.close()
    plt.close()

    #line
    all_s = Skill.objects.all()
    student = User.objects.get(id=uid)
    #set_subjs=set()
    s_to_print = []
    for s in all_s:
        diction = {}
        print(s.skill_name)
        for q in SkillQuestion.objects.filter(skill=s):
            ans = SkillAnswer.objects.filter(student=student)
            ans = ans.filter(question=q)
            #ans=ans.order_by('date')
            for a in ans:
                # print(a.question.skill)
                print(a.answer)
                print(a.date)
                #print(" ")
                if a.date.year not in diction:
                    diction[a.date.year] = [0.0, 0.0, 0.0, 0.0]
                if a.date.month <= 6:
                    diction[a.date.year][0] += a.sentiment
                    diction[a.date.year][1] += 1
                else:
                    diction[a.date.year][2] += a.sentiment
                    diction[a.date.year][3] += 1

        diction2 = {}
        for stemp in sorted(diction):
            temp = "June" + str(stemp)
            temp2 = "Dec" + str(stemp)
            try:
                diction2[temp] = diction[stemp][0] / diction[stemp][1]
            except:
                diction2[temp] = 0.0
            try:
                diction2[temp2] = diction[stemp][2] / diction[stemp][3]
            except:
                diction2[temp2] = 0.0
        plt.plot(list(diction2.keys()), list(diction2.values()))
        s_to_print.append(s.skill_name)
    #print(s.skill_name)
    plt.legend(s_to_print, loc="lower right")
    #print("SUbject name is ",s.subject_name)

    plt.tight_layout()
    plt.xlabel('Time')
    plt.ylabel('Sentiment score')
    fig5 = plt.gcf()
    buf5 = io.BytesIO()
    fig5.savefig(buf5, format="png", bbox_inches='tight')
    buf5.seek(0)
    string5 = base64.b64encode(buf5.read())
    uri5 = urllib.parse.quote(string5)

    plt.close()
    return uri2, uri3, uri4, uri5
示例#5
0
 def plot_feature_contribution(self):
     # plot feature contribution
     wc = waterfall_chart.plot(self.features, self.metric_score)
     return wc
示例#6
0
def plot_waterfall(Column, contributions, rotation_value=90, threshold=0.2, sorted_value=True, **kargs):
    return waterfall_chart.plot(Column, contributions, rotation_value=rotation_value, threshold=threshold, sorted_value=sorted_value,**kargs)
示例#7
0
import waterfall_chart

measure_names = [
    'Reference', 'Renewables', 'LDV\nElectrification', 'Heat\nPumps',
    'Other GHG\nReductions'
]
measure_quantities = [433, -103, -57, -27, 86.2 - 433 + (103 + 57 + 27)]
plot = waterfall_chart.plot(measure_names,
                            measure_quantities,
                            rotation_value=0,
                            figsize=(7, 4),
                            net_label='2050 Goal',
                            Title='CA GHG Emissions in 2050',
                            y_lab=r'MMT CO$_2$e',
                            green_color='red',
                            red_color='green',
                            formatting='{:,.0f}')

plot.ylim([0, 500])

plot.savefig('waterfall.pdf')