def general_explanation_using_skater(all_roles_scores, labels_training_set, labels_test_set, df_train_set, df_test_set, alpha): ''' Show the weight that more influenced a decision in eli 5 framework ---------------------------------------------------------------- Params: all_roles_score = list of all the marks present in test and train set for each role labels_training_set labels_test_set df_train_set df_test_set ''' le = preprocessing.LabelEncoder() le.fit(all_roles_scores) train_encoded_values = le.transform(labels_training_set) test_encoded_values = le.transform(labels_test_set) # boost_classifier = XGBClassifier(gamma = gamma, max_depth = maxde, min_child_weight = minchild) # boost_classifier.fit(df_train_set, train_encoded_values) # predictions = boost_classifier.predict(df_test_set) # predictions = predictions.astype('int') model_ordinal = LogisticAT(alpha=alpha) model_ordinal.fit(df_train_set.values, train_encoded_values) predictions = model_ordinal.predict(df_test_set) interpreter = Interpretation(df_train_set, feature_names=list(df_train_set.columns)) model = InMemoryModel(model_ordinal.predict_proba, examples=df_train_set[:10]) plots = interpreter.feature_importance.feature_importance(model, ascending=True) # fig, ax = plt.subplots(figsize=(5,35)) # plots = interpreter.feature_importance.plot_feature_importance(model, ascending=True, ax= ax) return plots
def locals_explanation_using_shap(mode, all_score, labels_training_set, labels_test_set, a, train_set, test_set, position, integral_test_set): ''' :param mode: save or load, in order to access the already computed :param all_score: all the score from train set and test set :param labels_training_set: :param labels_test_set: :param a: alpha parameter for mord ordinal regression :param train_set: :param test_set: :paramn integral_test_set: test set without robust scaler application :return: shap explainer list of shap values list of predictions from test set (encoded) list of real prediction from test set (presents also intervals) list of motivation for each prediction ''' if (mode == 'save'): le = preprocessing.LabelEncoder() le.fit(all_score) train_encoded_values = le.transform(labels_training_set) test_encoded_values = le.transform(labels_test_set) model_ordinal = LogisticAT(alpha=a) model_ordinal.fit(train_set.values, train_encoded_values) predictions = model_ordinal.predict(test_set) real_predictions = le.inverse_transform(predictions) # explain all the predictions in the test set explainer = shap.KernelExplainer(model_ordinal.predict_proba, train_set) shap_values = explainer.shap_values(test_set) with open("mord_shap_values_" + position + "without_ratings.txt", "wb") as fp: pickle.dump(shap_values, fp) else: le = preprocessing.LabelEncoder() le.fit(all_score) train_encoded_values = le.transform(labels_training_set) test_encoded_values = le.transform(labels_test_set) model_ordinal = LogisticAT(alpha=a) model_ordinal.fit(train_set.values, train_encoded_values) predictions = model_ordinal.predict(test_set) real_predictions = le.inverse_transform(predictions) # explain all the predictions in the test set explainer = shap.KernelExplainer(model_ordinal.predict_proba, train_set) with open("mord_shap_values_" + position + "without_ratings.txt", "rb") as fp: shap_values = pickle.load(fp) list_of_explanation = [] for inde in range(0, len(predictions)): # extract predictions value importance_list = shap_values[predictions[inde]][inde, :] # extract the column index of positive increasing elements explanation = {} index = 0 for el in importance_list: if (el > 0): explanation[index] = el index += 1 exp = sorted(explanation.items(), key=lambda x: x[1], reverse=True) explanation = {} for el in exp: if (el[1] >= 0.01): explanation[el[0]] = el[1] newexp = {} for key in explanation.keys(): newexp[key] = train_set.columns[key] explanation = {} for key in newexp.keys(): explanation[newexp[key]] = integral_test_set.iloc[inde, key] list_of_explanation.append(explanation) return explainer, shap_values, predictions, real_predictions, list_of_explanation
stratify=y_age, random_state=seed) pd.DataFrame(idx_train).to_csv( os.path.join('Results_' + str(seed), 'train_' + sex + '.csv')) pd.DataFrame(idx_test).to_csv( os.path.join('Results_' + str(seed), 'test_' + sex + '.csv')) # ordinal logistic regression fit model_ordinal = LogisticAT(alpha=0) df_gender_train = df_gender.loc[idx_train] model_ordinal.fit(df_gender_train[['age']].astype(int), df_gender_train['grading'].astype(int)) df_overall.loc[df_gender.index, 'ordered_LR_prediction'] = model_ordinal.predict( df_gender[['age']]) # compute delta grading df_overall['delta_grading_olr'] = df_overall['grading'] - df_overall[ 'ordered_LR_prediction'] df_overall['target_variable'] = df_overall['delta_grading_olr'].apply(f_) # read indices train_index_male = pd.read_csv(
# -*- coding: utf-8 -*- """ Created on Sun May 10 20:25:17 2020 @author: HO18971 """ from mord import LogisticAT from utilities import load_task, plot_olr import pandas as pd df_task = load_task('phenotype.csv') # CHANGE THE NAME OF YOUR PHENOTYPE FILE model_ordinal_m = LogisticAT(alpha=0) df_task_original_m = df_task[df_task['gender'] == 0] model_ordinal_m.fit(df_task_original_m[['age']].astype(int), df_task_original_m['grading'].astype(int)) y_pred_m = model_ordinal_m.predict(df_task_original_m[['age']]) df_task.loc[df_task_original_m.index, 'ordered_LR_prediction'] = y_pred_m model_ordinal_f = LogisticAT(alpha=0) df_task_original_f = df_task[df_task['gender'] == 1] model_ordinal_f.fit(df_task_original_f[['age']].astype(int), df_task_original_f['grading'].astype(int)) y_pred_f = model_ordinal_f.predict(df_task_original_f[['age']]) df_task.loc[df_task_original_f.index, 'ordered_LR_prediction'] = y_pred_f thresholds_m = model_ordinal_m.theta_ / model_ordinal_m.coef_ thresholds_f = model_ordinal_f.theta_ / model_ordinal_f.coef_ df_threshold = pd.DataFrame( {
# svc from sklearn import svm clf = svm.SVC() clf.fit(X_train, y_train) clf_pre_svm = clf.predict(X_test) # svr from sklearn import svm clf = svm.SVR() clf.fit(X_train, y_train) clf_pre_svr = clf.predict(X_test) # Threshold model from mord import LogisticAT logit = LogisticAT() logit.fit(X_train, y_train) clf_pre_LogisticAT = logit.predict(X_test) # Threshold model from mord import LogisticIT logit = LogisticIT() logit.fit(X_train, y_train) clf_pre_LogisticIT = logit.predict(X_test) # regression ordianl from mord import OrdinalRidge clf = OrdinalRidge() clf.fit(X_train, y_train) clf_pre_OrdinalRidge = clf.predict(X_test) # OR-FSVM (proposed approach)
] train_data = train_data.drop(train_data.columns[0], axis=1) test_data = test_data.drop(test_data.columns[0], axis=1) #CHECK SHAPE print("shape: ", test_data.shape, train_data.shape) #train_y = np.reshape(train_y.values,(-1,4)) #TRAIN MODELS #DNN # model = tflearn.DNN(network, tensorboard_verbose=0) #model.fit(train_data.values, train_y, show_metric = True, batch_size=10) #oc oc1 = LogisticAT() oc2 = LogisticIT(alpha=0.1) oc3 = LAD() #oc = GradientBoostingClassifier(max_depth=3,n_estimators=350, learning_rate = 0.05,subsample=0.9, max_leaf_nodes=30000) oc1.fit(train_data.values, train_y_oc) oc2.fit(train_data.values, train_y_oc) oc3.fit(train_data.values, train_y_oc) #PREDICT predictions_oc1 = oc1.predict(test_data.values) predictions_oc2 = oc2.predict(test_data.values) predictions_oc3 = oc3.predict(test_data.values) #predictions_dnn = model.predict(test_data.values) #predictions_dnn = [item for sublist in predictions_dnn for item in sublist] #avg #predictions = np.mean([predictions_oc, predictions_dnn], axis = 0) #PREDICTIONS AND LABELS #for p, t in zip(predictions, test_y.values): # print('Test predictions: {}, Truth: {}'.format(p, t))