def general_explanation_using_skater(all_roles_scores, labels_training_set,
                                     labels_test_set, df_train_set,
                                     df_test_set, alpha):
    '''
    Show the weight that more influenced a decision in eli 5 framework

    ----------------------------------------------------------------
    Params:
        all_roles_score = list of all the marks present in test and train set for each role
        labels_training_set
        labels_test_set
        df_train_set
        df_test_set

    '''
    le = preprocessing.LabelEncoder()
    le.fit(all_roles_scores)
    train_encoded_values = le.transform(labels_training_set)
    test_encoded_values = le.transform(labels_test_set)

    # boost_classifier = XGBClassifier(gamma = gamma, max_depth = maxde, min_child_weight = minchild)
    # boost_classifier.fit(df_train_set, train_encoded_values)

    # predictions = boost_classifier.predict(df_test_set)
    # predictions = predictions.astype('int')

    model_ordinal = LogisticAT(alpha=alpha)
    model_ordinal.fit(df_train_set.values, train_encoded_values)
    predictions = model_ordinal.predict(df_test_set)

    interpreter = Interpretation(df_train_set,
                                 feature_names=list(df_train_set.columns))

    model = InMemoryModel(model_ordinal.predict_proba,
                          examples=df_train_set[:10])

    plots = interpreter.feature_importance.feature_importance(model,
                                                              ascending=True)

    # fig, ax = plt.subplots(figsize=(5,35))
    # plots = interpreter.feature_importance.plot_feature_importance(model, ascending=True, ax= ax)

    return plots
def locals_explanation_using_shap(mode, all_score, labels_training_set,
                                  labels_test_set, a, train_set, test_set,
                                  position, integral_test_set):
    '''

    :param mode: save or load, in order to access the already computed
    :param all_score: all the score from train set and test set
    :param labels_training_set:
    :param labels_test_set:
    :param a: alpha parameter for mord ordinal regression
    :param train_set:
    :param test_set:
    :paramn integral_test_set: test set without robust scaler application
    :return:
            shap explainer
            list of shap values
            list of predictions from test set (encoded)
            list of real prediction from test set (presents also intervals)
            list of motivation for each prediction
    '''
    if (mode == 'save'):
        le = preprocessing.LabelEncoder()
        le.fit(all_score)
        train_encoded_values = le.transform(labels_training_set)
        test_encoded_values = le.transform(labels_test_set)

        model_ordinal = LogisticAT(alpha=a)
        model_ordinal.fit(train_set.values, train_encoded_values)
        predictions = model_ordinal.predict(test_set)
        real_predictions = le.inverse_transform(predictions)

        # explain all the predictions in the test set
        explainer = shap.KernelExplainer(model_ordinal.predict_proba,
                                         train_set)

        shap_values = explainer.shap_values(test_set)

        with open("mord_shap_values_" + position + "without_ratings.txt",
                  "wb") as fp:
            pickle.dump(shap_values, fp)
    else:
        le = preprocessing.LabelEncoder()
        le.fit(all_score)
        train_encoded_values = le.transform(labels_training_set)
        test_encoded_values = le.transform(labels_test_set)

        model_ordinal = LogisticAT(alpha=a)
        model_ordinal.fit(train_set.values, train_encoded_values)
        predictions = model_ordinal.predict(test_set)
        real_predictions = le.inverse_transform(predictions)

        # explain all the predictions in the test set
        explainer = shap.KernelExplainer(model_ordinal.predict_proba,
                                         train_set)

        with open("mord_shap_values_" + position + "without_ratings.txt",
                  "rb") as fp:
            shap_values = pickle.load(fp)

    list_of_explanation = []
    for inde in range(0, len(predictions)):
        # extract predictions value
        importance_list = shap_values[predictions[inde]][inde, :]

        # extract the column index of positive increasing elements
        explanation = {}
        index = 0
        for el in importance_list:
            if (el > 0):
                explanation[index] = el
            index += 1
        exp = sorted(explanation.items(), key=lambda x: x[1], reverse=True)

        explanation = {}
        for el in exp:
            if (el[1] >= 0.01):
                explanation[el[0]] = el[1]
        newexp = {}
        for key in explanation.keys():
            newexp[key] = train_set.columns[key]

        explanation = {}
        for key in newexp.keys():
            explanation[newexp[key]] = integral_test_set.iloc[inde, key]
        list_of_explanation.append(explanation)

    return explainer, shap_values, predictions, real_predictions, list_of_explanation
Пример #3
0
            os.path.join('Results_' + str(seed), 'train_' + sex + '.csv'))

        pd.DataFrame(idx_test).to_csv(
            os.path.join('Results_' + str(seed), 'test_' + sex + '.csv'))

        # ordinal logistic regression fit

        model_ordinal = LogisticAT(alpha=0)

        df_gender_train = df_gender.loc[idx_train]

        model_ordinal.fit(df_gender_train[['age']].astype(int),
                          df_gender_train['grading'].astype(int))

        df_overall.loc[df_gender.index,
                       'ordered_LR_prediction'] = model_ordinal.predict(
                           df_gender[['age']])

    # compute delta grading

    df_overall['delta_grading_olr'] = df_overall['grading'] - df_overall[
        'ordered_LR_prediction']

    df_overall['target_variable'] = df_overall['delta_grading_olr'].apply(f_)

    # read indices

    train_index_male = pd.read_csv(
        os.path.join('Results_' + str(seed), 'train_male.csv'))['index']

    train_index_female = pd.read_csv(
        os.path.join('Results_' + str(seed), 'train_female.csv'))['index']
from sklearn.metrics import accuracy_score

#loading dataset
wvs = pd.read_csv(
    "C:/Datasets_BA/360DigiTMG/DS_India/360DigiTMG DS India Module wise PPTs/Module 10b Ordinal Logistic Regression/wvs.csv"
)
wvs.head()

# EDA
wvs.describe()
wvs.columns

#converting into binary
lb = LabelEncoder()
wvs["poverty"] = lb.fit_transform(wvs["poverty"])
wvs["religion"] = lb.fit_transform(wvs["religion"])
wvs["degree"] = lb.fit_transform(wvs["degree"])
wvs["country"] = lb.fit_transform(wvs["country"])
wvs["gender"] = lb.fit_transform(wvs["gender"])

from mord import LogisticAT
model = LogisticAT(alpha=0).fit(wvs.iloc[:, 1:], wvs.iloc[:, 0])
# alpha parameter set to zero to perform no regularisation.fit(x_train,y_train)
model.coef_
model.classes_

predict = model.predict(wvs.iloc[:, 1:])  # Train predictions

# Accuracy
accuracy_score(wvs.iloc[:, 0], predict)
Created on Sun May 10 20:25:17 2020

@author: HO18971
"""

from mord import LogisticAT
from utilities import load_task, plot_olr
import pandas as pd

df_task = load_task('phenotype.csv')  # CHANGE THE NAME OF YOUR PHENOTYPE FILE

model_ordinal_m = LogisticAT(alpha=0)
df_task_original_m = df_task[df_task['gender'] == 0]
model_ordinal_m.fit(df_task_original_m[['age']].astype(int),
                    df_task_original_m['grading'].astype(int))
y_pred_m = model_ordinal_m.predict(df_task_original_m[['age']])
df_task.loc[df_task_original_m.index, 'ordered_LR_prediction'] = y_pred_m

model_ordinal_f = LogisticAT(alpha=0)
df_task_original_f = df_task[df_task['gender'] == 1]
model_ordinal_f.fit(df_task_original_f[['age']].astype(int),
                    df_task_original_f['grading'].astype(int))
y_pred_f = model_ordinal_f.predict(df_task_original_f[['age']])
df_task.loc[df_task_original_f.index, 'ordered_LR_prediction'] = y_pred_f

thresholds_m = model_ordinal_m.theta_ / model_ordinal_m.coef_
thresholds_f = model_ordinal_f.theta_ / model_ordinal_f.coef_

df_threshold = pd.DataFrame(
    {
        'male': thresholds_m,
Пример #6
0
from sklearn import svm
clf = svm.SVC()
clf.fit(X_train, y_train)
clf_pre_svm = clf.predict(X_test)

# svr
from sklearn import svm
clf = svm.SVR()
clf.fit(X_train, y_train)
clf_pre_svr = clf.predict(X_test)

# Threshold model
from mord import LogisticAT
logit = LogisticAT()
logit.fit(X_train, y_train)
clf_pre_LogisticAT = logit.predict(X_test)

# Threshold model
from mord import LogisticIT
logit = LogisticIT()
logit.fit(X_train, y_train)
clf_pre_LogisticIT = logit.predict(X_test)

# regression ordianl
from mord import OrdinalRidge
clf = OrdinalRidge()
clf.fit(X_train, y_train)
clf_pre_OrdinalRidge = clf.predict(X_test)

# OR-FSVM (proposed approach)
Rank_score = RankFun(X_train, X_test, y_train)
Пример #7
0
        print("shape: ", test_data.shape, train_data.shape)
        #train_y = np.reshape(train_y.values,(-1,4))
        #TRAIN MODELS
        #DNN
        # model =  tflearn.DNN(network, tensorboard_verbose=0)
        #model.fit(train_data.values, train_y,  show_metric = True, batch_size=10)
        #oc
        oc1 = LogisticAT()
        oc2 = LogisticIT(alpha=0.1)
        oc3 = LAD()
        #oc = GradientBoostingClassifier(max_depth=3,n_estimators=350, learning_rate = 0.05,subsample=0.9, max_leaf_nodes=30000)
        oc1.fit(train_data.values, train_y_oc)
        oc2.fit(train_data.values, train_y_oc)
        oc3.fit(train_data.values, train_y_oc)
        #PREDICT
        predictions_oc1 = oc1.predict(test_data.values)
        predictions_oc2 = oc2.predict(test_data.values)
        predictions_oc3 = oc3.predict(test_data.values)

        #predictions_dnn = model.predict(test_data.values)
        #predictions_dnn = [item for sublist in predictions_dnn for item in sublist]
        #avg
        #predictions = np.mean([predictions_oc, predictions_dnn], axis = 0)
        #PREDICTIONS AND LABELS
        #for p, t in zip(predictions, test_y.values):
        #    print('Test predictions: {}, Truth: {}'.format(p, t))

        #CORRELATIONS OF EACH MODEL AND AVG. CORR.
        #corr_avg = pearsonr(predictions, test_y.values)
        #print("Corr. Avg: ", corr_avg[0])
        #correlations_avg.append(corr_avg[0])