Пример #1
0
def train_explainer(regressor: LogisticRegression, feature_names: List[str], X_train: np.ndarray, X_test: np.ndarray, y_test: np.ndarray):
    predict_fn = lambda x: regressor.predict(x)

    explainer = AnchorTabular(predict_fn, feature_names)
    explainer.fit(X_train)

    file_path=""
    with open("explainer.dill", "wb") as file:
        dill.dump(explainer, file)
        file_path = file.name

    mlflow.log_artifact("explainer.dill", "model")

    print(np.where(y_test == 1)[0])
    probe = np. array([40.316667556762695, 0.5605325219195545, 0.350, 0, 3, 1, 5], dtype=float)
    #probe = np. array(X_test[700], dtype=float)
    explanation = explainer.explain(probe)

    print('Anchor: %s' % (' AND '.join(explanation['names'])))
    print('Precision: %.2f' % explanation['precision'])
    print('Coverage: %.2f' % explanation['coverage'])
    print(explanation)
    return explainer

# kedro install
# kedro run
# kedro viz
Пример #2
0
def atab_explainer(lr_classifier, adult_data):
    predictor = predict_fcn(predict_type='class',
                            clf=lr_classifier,
                            preproc=adult_data['preprocessor'])
    atab = AnchorTabular(
        predictor=predictor,
        feature_names=adult_data['metadata']['feature_names'],
        categorical_names=adult_data['metadata']['category_map'])
    atab.fit(adult_data['X_train'], disc_perc=(25, 50, 75))
    return atab
Пример #3
0
def train_explainer(artifacts_folder: str, data: AdultData, model: RandomForestClassifier) -> AnchorTabular:
    def predict_fn(x):
        return model.predict(x)

    explainer = AnchorTabular(predict_fn, data.feature_names, categorical_names=data.category_map, seed=1)
    explainer.fit(data.X_train, disc_perc=(25, 50, 75))
    with open(f"{artifacts_folder}/{EXPLAINER_FOLDER}" + "/explainer.dill", "wb") as f:
        explainer.predictor = None
        explainer.samplers[0].predictor = None
        dill.dump(explainer, f)
    return explainer
Пример #4
0
class Anchors(FeatureImportance):
    """
    Feature importance method by [RIB]_.

    References
    ----------
    .. [RIB] Ribeiro, et al, "Anchors: High-precision model-agnostic explanations",
     Proceedings of the AAAI Conference on Artificial Intelligence, Volume 32, 2018.

    """

    def __init__(self, model: Any, seed: int = SEED):
        super().__init__(seed=seed)
        self._model = assign_model(model=model)
        self._explainer = None

    def fit(self, X: Any) -> None:
        self._explainer = AnchorTabular(
            predictor=self._model.predict_proba,
            feature_names=list(range(X.shape[1])), seed=self._seed)
        self._explainer.fit(train_data=X)
        # disc_perc=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9))
        # disc_perc=(0.1, 0.3, 0.5, 0.7, 0.9))
        # disc_perc=(0.2, 0.4, 0.6, 0.8))

    def _compute_anchors_per_sample(self, X: np.ndarray, idx: int) -> List:
        result = self._explainer.explain(X=X[idx, :])
        return result.data['raw']['feature']

    @staticmethod
    def _calculate_importance(anchors: List, output_shape: Tuple) -> np.ndarray:
        importance = np.zeros(shape=output_shape)
        for k, anchor in enumerate(anchors):
            if isinstance(anchor, list):
                importance[k, anchor] = 1
            else:
                importance[anchor] = 1
        return importance

    def _compute_anchors(self, X: np.ndarray, num_jobs: int) -> List:
        return Parallel(n_jobs=num_jobs)(
            delayed(self._compute_anchors_per_sample)(X, sample_idx)
            for sample_idx in range(X.shape[0]))

    def explain(self, X: np.ndarray, sample_idx: int) -> np.ndarray:
        anchors = self._compute_anchors_per_sample(X=X, idx=sample_idx)
        return self._calculate_importance(anchors=anchors, output_shape=(X.shape[1],))

    def explain_batch(self, X: np.ndarray, num_jobs: int = 2) -> np.ndarray:
        anchors = self._compute_anchors(X=X, num_jobs=num_jobs)
        return self._calculate_importance(anchors=anchors, output_shape=X.shape)
Пример #5
0
def at_iris_explainer(get_iris_dataset, rf_classifier, request):
    """
    Instantiates and fits an AnchorTabular explainer for the Iris dataset.
    """

    predict_type = request.param
    data = get_iris_dataset
    clf, _ = rf_classifier  # preprocessor not necessary

    # instantiate and fit explainer
    pred_fn = predict_fcn(predict_type, clf)
    explainer = AnchorTabular(pred_fn, data['metadata']['feature_names'])
    explainer.fit(data['X_train'], disc_perc=(25, 50, 75))

    return data['X_test'], explainer, pred_fn, predict_type
Пример #6
0
def at_adult_explainer(get_adult_dataset, rf_classifier, request):
    """
    Instantiates and fits an AnchorTabular explainer for the Adult dataset.
    """

    # fit random forest classifier
    predict_type = request.param
    data = get_adult_dataset
    clf, preprocessor = rf_classifier

    # instantiate and fit explainer
    pred_fn = predict_fcn(predict_type, clf, preprocessor)
    explainer = AnchorTabular(
        pred_fn,
        data['metadata']['feature_names'],
        categorical_names=data['metadata']['category_map'])
    explainer.fit(data['X_train'], disc_perc=(25, 50, 75))

    return data['X_test'], explainer, pred_fn, predict_type
Пример #7
0
    def fit(self, x, y):

        self.dim = x.shape[1]

        # clf = sklearn.svm.SVC(kernel=self.kernel, probability=True)
        clf = RandomForestClassifier()
        clf.fit(x, y)

        y_pred = clf.predict(x)
        print("Clf model accuracy: [{:.4f}]".format(
            sklearn.metrics.accuracy_score(y, y_pred)))

        self.ano_idx = np.where(y == 1)[0]
        print(self.ano_idx.shape)

        n_f = x.shape[1]
        feature_names = ["A" + str(i) for i in range(n_f)]
        # use anchor
        predict_fn = lambda xx: clf.predict_proba(xx)
        explainer = AnchorTabular(predict_fn, feature_names)
        explainer.fit(x, disc_perc=(25, 50, 75))

        exp_sub_lst = []
        for i in tqdm(range(len(self.ano_idx))):
            ano = x[self.ano_idx[i]]
            explanation = explainer.explain(ano, threshold=0.95)
            anchor = explanation['anchor']
            f_sub = []
            for a in anchor:
                for item in a.split(" "):
                    if item.startswith("A"):
                        item = int(item[1:])
                        f_sub.append(item)
            # print(anchor, f_sub)
            if len(f_sub) == 0:
                f_sub = np.arange(n_f)
            exp_sub_lst.append(f_sub)

        return exp_sub_lst
Пример #8
0
def make_anchor_tabular(dirname: Optional[Path] = None) -> AnchorTabular:
    # train model
    iris_data = load_iris()

    clf = LogisticRegression(solver="liblinear", multi_class="ovr")
    clf.fit(iris_data.data, iris_data.target)

    # create explainer
    explainer = AnchorTabular(clf.predict,
                              feature_names=iris_data.feature_names)
    explainer.fit(iris_data.data, disc_perc=(25, 50, 75))

    if dirname is not None:
        explainer.save(dirname)
    return explainer
    def retrain_classifier_final(self, args, nn_model_ref):
        nn_model_ref.epochs = args.num_epch_2
        nn_model_ref.batch_size_2 = args.batch_size_2
        nn_model_ref.net.freeze()
        X_train_proba_feat, X_eval_proba_feat = nn_model_ref.all_intermediaire, nn_model_ref.all_intermediaire_val
        Y_train_proba = nn_model_ref.Y_train_nn_binaire
        Y_eval_proba = nn_model_ref.Y_val_nn_binaire
        print("START RETRAIN LINEAR NN GOHR ")
        print()
        """net_retrain, h = train_speck_distinguisher(args, X_train_proba_feat.shape[1], X_train_proba_feat,
                                                   Y_train_proba, X_eval_proba_feat, Y_eval_proba,
                                                   bs=args.batch_size_2,
                                                   epoch=args.num_epch_2, name_ici="retrain_nn_gohr",
                                                   wdir=self.path_save_model)"""

        from alibi.explainers import AnchorTabular
        #from alibi.explainers import AnchorImage
        from sklearn.ensemble import RandomForestClassifier

        clf = RandomForestClassifier(n_estimators=50)
        clf.fit(X_train_proba_feat, Y_train_proba)
        predict_fn = lambda x: clf.predict_proba(x)
        feature_names = [i for i in range(X_train_proba_feat.shape[1])]
        explainer = AnchorTabular(predict_fn, feature_names)
        idx = 0
        explainer.fit(X_train_proba_feat, disc_perc=(25))
        print('Prediction: ',
              explainer.predictor(X_eval_proba_feat[idx].reshape(1, -1))[0])

        #print('Prediction: ', explainer.predict_fn(X_eval_proba_feat[idx].reshape(1, -1))[0])
        explanation = explainer.explain(X_eval_proba_feat[idx], threshold=0.8)
        print('Anchor: %s' % (' AND '.join(explanation['names'])))
        print('Precision: %.2f' % explanation['precision'])
        print('Coverage: %.2f' % explanation['coverage'])

        print(ok)

        return net_retrain
Пример #10
0
import numpy as np
from sklearn.datasets import load_iris
from alibi.explainers import AnchorTabular

import requests

dataset = load_iris()
feature_names = dataset.feature_names
iris_data = dataset.data

model_url = "http://localhost:8003/seldon/seldon/iris/api/v1.0/predictions"


def predict_fn(X):
    data = {"data": {"ndarray": X.tolist()}}
    r = requests.post(model_url, json={"data": {"ndarray": [[1, 2, 3, 4]]}})
    return np.array(r.json()["data"]["ndarray"])


explainer = AnchorTabular(predict_fn, feature_names)
explainer.fit(iris_data, disc_perc=(25, 50, 75))

explainer.save("./explainer/")
Пример #11
0
    clf = DecisionTreeClassifier(random_state=42)
else:
    clf = RandomForestClassifier(random_state=42)
# st.sidebar.write(selected_model)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
st.write("""### Metrics:""")
st.write('Train accuracy:', accuracy_score(y_train,clf.predict(X_train)))
st.write('Test accuracy:', accuracy_score(y_test, clf.predict(X_test)))
confusion_matrix(y_test, y_pred)
st.write('Confusion matrix:')
plot_confusion_matrix(clf, X_test, y_test)
st.pyplot()
# st.write(classification_report(y_test, y_pred))
predict_fn = lambda x: clf.predict_proba(x)
explainer = AnchorTabular(predict_fn, feature_names)
explainer.fit(X_train)
idx = st.sidebar.slider(label='Select an instance:',min_value=1,max_value=len(y_test))
st.write("""### Selected instance:""")
st.write(X_test_df.iloc[[idx-1]], height=150)
print(y_train_df.iloc[[idx-1]])
st.write('Prediction: ', class_names[explainer.predictor(X_test[idx-1].reshape(1, -1))[0]])
st.write("""### Prediction Explained:""")
with st.spinner('Calculating'):
    explanation = explainer.explain(X_test[idx-1], threshold=0.70)
    st.write('Anchor (instance explanation): %s' % (' AND '.join(explanation.anchor)))
    st.write('Precision: %.2f' % explanation.precision)
    st.write('Coverage: %.2f' % explanation.coverage)
# st.write("""### Trust score:""")
    ts = TrustScore(k_filter=10,
                alpha=.05,
Пример #12
0
def make_anchor_tabular_income(
        dirname: Optional[Path] = None) -> AnchorTabular:
    # adapted from:
    # https://docs.seldon.io/projects/alibi/en/latest/examples/anchor_tabular_adult.html
    np.random.seed(0)

    # prepare data
    adult = fetch_adult()
    data = adult.data
    target = adult.target
    feature_names = adult.feature_names
    category_map = adult.category_map

    data_perm = np.random.permutation(np.c_[data, target])
    data = data_perm[:, :-1]
    target = data_perm[:, -1]

    # build model
    idx = 30000
    X_train, Y_train = data[:idx, :], target[:idx]
    X_test, Y_test = data[idx + 1:, :], target[idx + 1:]

    ordinal_features = [
        x for x in range(len(feature_names))
        if x not in list(category_map.keys())
    ]
    ordinal_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
    ])

    categorical_features = list(category_map.keys())
    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ])

    preprocessor = ColumnTransformer(transformers=[
        ("num", ordinal_transformer, ordinal_features),
        ("cat", categorical_transformer, categorical_features),
    ])

    clf = RandomForestClassifier(n_estimators=50)

    model_pipeline = Pipeline(steps=[
        ("preprocess", preprocessor),
        ("classifier", clf),
    ])

    model_pipeline.fit(X_train, Y_train)

    explainer = AnchorTabular(model_pipeline.predict,
                              feature_names,
                              categorical_names=category_map,
                              seed=1)

    explainer.fit(X_train, disc_perc=[25, 50, 75])

    if dirname is not None:
        explainer.save(dirname)
    return explainer
Пример #13
0
 def fit(self, X: Any) -> None:
     self._explainer = AnchorTabular(
         predictor=self._model.predict_proba,
         feature_names=list(range(X.shape[1])), seed=self._seed)
     self._explainer.fit(train_data=X)
Пример #14
0
    def anchors_connector(self, *arg):
        query_instance = dict(s.split(':') for s in arg)

        #anchor instance to model instance. Input: Numpy. Output: Pandas df. Turns numbers into categories.
        def adapter(n):
            d = pd.DataFrame(data=n, columns=self.featureNames)
            categories = self.getCategoricalFeatures()
            for c in categories:
                d[c] = d[c].map(self.dictionary[c]["values"])
            #d['Sex'] = d['Sex'].map({0:'Male', 1: 'Female'})
            #d['Embarked'] = d['Embarked'].map({0: 'Southampton', 1: 'Cherbourg', 2: 'Queenstown'})
            #d['Pclass'] = d['Pclass'].map({0: 'First', 1: 'Second', 2: 'Third'})
            return d

        #model instance to anchor instance. Input: Pandas df. Output: Numpy. Turns categories into numbers.
        def reverse_adapter(p):
            d = p.copy()
            categories = self.getCategoricalFeatures()
            for c in categories:
                d[c] = d[c].map(
                    {v: k
                     for k, v in self.dictionary[c]["values"].items()})
            #d['Sex'] = d['Sex'].map({'Male': 0, 'Female': 1})
            #d['Embarked'] = d['Embarked'].map({'Southampton': 0, 'Cherbourg': 1, 'Queenstown': 2})
            #d['Pclass'] = d['Pclass'].map({'First': 0, 'Second': 1, 'Third': 2})
            n = d.to_numpy().astype(np.float)
            return (n)

        predict_fn = lambda x: self.model.predict(adapter(x))

        #create the category map
        categories = self.getCategoricalFeatures()
        category_map = {}
        for i in range(len(self.featureNames)):
            if self.featureNames[i] in categories:
                category_map[i] = [
                    str(k) for k in list(self.dictionary[self.featureNames[i]]
                                         ["values"].values())
                ]
        #category_map = {0: ['First', 'Second', 'Third'], 1: ['Male','Female'], 4: ['Southampton', 'Cherbourg', 'Queenstown']}

        print("-------")
        print(query_instance)
        print(reverse_adapter(pd.DataFrame([query_instance])))

        #sort query_instance
        sorted_query_instance = {}
        for f in self.featureNames:
            sorted_query_instance[f] = query_instance[f]

        print(sorted_query_instance)
        print(reverse_adapter(pd.DataFrame([sorted_query_instance])))

        explainer = AnchorTabular(predict_fn,
                                  feature_names=self.featureNames,
                                  categorical_names=category_map)
        anchor_training = reverse_adapter(self.X_train)
        explainer.fit(anchor_training, disc_perc=[25, 50, 75])
        explanation = explainer.explain(reverse_adapter(
            pd.DataFrame([sorted_query_instance])),
                                        threshold=0.90,
                                        max_anchor_size=3,
                                        batch_size=2000)
        print('Anchor: %s' % (' AND '.join(explanation['data']['anchor'])))
        print('Precision: %.2f' % explanation['precision'])
        print('Coverage: %.2f' % explanation['coverage'])

        #build rule
        rule = ""
        names = explanation['data']['anchor']
        precision = np.asarray(explanation['raw']['precision'])
        precision[1:] -= precision[:-1].copy()
        precision = [round(elem, 2) for elem in precision.tolist()]
        for i in range(0, len(names)):
            rule = rule + names[i]
            importance = round(precision[i] / sum(precision) * 100, 2)

            rule = rule + " (" + str(importance) + "%)"
            if (i < len(names) - 1):
                rule = rule + " AND "

        self.explanation = 'I generated the following rule for you. It describes the boundaries under which the current prediction remains stable: <br> <br> <big>' + rule + '</big>. <br> <br> Each rule condition has an importance score which shows how critical the condition is for the prediction outcome to stay stable.'
        self.certainty = 'I tested the rule on many sample data instances. The condition applies on %.2f' % explanation[
            'coverage'] + ' of the instances. In these cases, the rule was accurate in %.2f' % explanation[
                'precision'] + ' of the cases.'
        return (True)
def main(unused_args):

    ##Read hypertuning values from file
    if args.component == 'training':
        timestamp = str(args.timestamp)

        filename = "/mnt/Model_Blerssi/hpv-" + timestamp + ".txt"
        f = open(filename, "r")
        args.tf_batch_size = int(f.readline())
        args.learning_rate = float(f.readline())
        print("****************")
        print("Optimized Hyper paramater value")
        print("Batch-size = " + str(args.tf_batch_size))
        print("Learning rate = " + str(args.learning_rate))
        print("****************")

    # Feature columns
    COLUMNS = list(BLE_RSSI.columns)
    FEATURES = COLUMNS[2:]
    LABEL = [COLUMNS[0]]

    b3001 = tf.feature_column.numeric_column(key='b3001', dtype=tf.float64)
    b3002 = tf.feature_column.numeric_column(key='b3002', dtype=tf.float64)
    b3003 = tf.feature_column.numeric_column(key='b3003', dtype=tf.float64)
    b3004 = tf.feature_column.numeric_column(key='b3004', dtype=tf.float64)
    b3005 = tf.feature_column.numeric_column(key='b3005', dtype=tf.float64)
    b3006 = tf.feature_column.numeric_column(key='b3006', dtype=tf.float64)
    b3007 = tf.feature_column.numeric_column(key='b3007', dtype=tf.float64)
    b3008 = tf.feature_column.numeric_column(key='b3008', dtype=tf.float64)
    b3009 = tf.feature_column.numeric_column(key='b3009', dtype=tf.float64)
    b3010 = tf.feature_column.numeric_column(key='b3010', dtype=tf.float64)
    b3011 = tf.feature_column.numeric_column(key='b3011', dtype=tf.float64)
    b3012 = tf.feature_column.numeric_column(key='b3012', dtype=tf.float64)
    b3013 = tf.feature_column.numeric_column(key='b3013', dtype=tf.float64)
    feature_columns = [
        b3001, b3002, b3003, b3004, b3005, b3006, b3007, b3008, b3009, b3010,
        b3011, b3012, b3013
    ]

    df_full = pd.read_csv("/opt/iBeacon_RSSI_Labeled.csv")  #Labeled dataset

    # Input Data Preprocessing
    df_full = df_full.drop(['date'], axis=1)
    df_full[FEATURES] = (df_full[FEATURES]) / (-200)

    #Output Data Preprocessing
    dict = {
        'O02': 0,
        'P01': 1,
        'P02': 2,
        'R01': 3,
        'R02': 4,
        'S01': 5,
        'S02': 6,
        'T01': 7,
        'U02': 8,
        'U01': 9,
        'J03': 10,
        'K03': 11,
        'L03': 12,
        'M03': 13,
        'N03': 14,
        'O03': 15,
        'P03': 16,
        'Q03': 17,
        'R03': 18,
        'S03': 19,
        'T03': 20,
        'U03': 21,
        'U04': 22,
        'T04': 23,
        'S04': 24,
        'R04': 25,
        'Q04': 26,
        'P04': 27,
        'O04': 28,
        'N04': 29,
        'M04': 30,
        'L04': 31,
        'K04': 32,
        'J04': 33,
        'I04': 34,
        'I05': 35,
        'J05': 36,
        'K05': 37,
        'L05': 38,
        'M05': 39,
        'N05': 40,
        'O05': 41,
        'P05': 42,
        'Q05': 43,
        'R05': 44,
        'S05': 45,
        'T05': 46,
        'U05': 47,
        'S06': 48,
        'R06': 49,
        'Q06': 50,
        'P06': 51,
        'O06': 52,
        'N06': 53,
        'M06': 54,
        'L06': 55,
        'K06': 56,
        'J06': 57,
        'I06': 58,
        'F08': 59,
        'J02': 60,
        'J07': 61,
        'I07': 62,
        'I10': 63,
        'J10': 64,
        'D15': 65,
        'E15': 66,
        'G15': 67,
        'J15': 68,
        'L15': 69,
        'R15': 70,
        'T15': 71,
        'W15': 72,
        'I08': 73,
        'I03': 74,
        'J08': 75,
        'I01': 76,
        'I02': 77,
        'J01': 78,
        'K01': 79,
        'K02': 80,
        'L01': 81,
        'L02': 82,
        'M01': 83,
        'M02': 84,
        'N01': 85,
        'N02': 86,
        'O01': 87,
        'I09': 88,
        'D14': 89,
        'D13': 90,
        'K07': 91,
        'K08': 92,
        'N15': 93,
        'P15': 94,
        'I15': 95,
        'S15': 96,
        'U15': 97,
        'V15': 98,
        'S07': 99,
        'S08': 100,
        'L09': 101,
        'L08': 102,
        'Q02': 103,
        'Q01': 104
    }
    df_full['location'] = df_full['location'].map(dict)
    df_train = df_full.sample(frac=0.8, random_state=200)
    df_valid = df_full.drop(df_train.index)

    location_counts = BLE_RSSI.location.value_counts()
    x1 = np.asarray(df_train[FEATURES])
    y1 = np.asarray(df_train['location'])

    x2 = np.asarray(df_valid[FEATURES])
    y2 = np.asarray(df_valid['location'])

    def formatFeatures(features):
        formattedFeatures = {}
        numColumns = features.shape[1]

        for i in range(0, numColumns):
            formattedFeatures["b" + str(3001 + i)] = features[:, i]

        return formattedFeatures

    trainingFeatures = formatFeatures(x1)
    trainingCategories = y1

    testFeatures = formatFeatures(x2)
    testCategories = y2

    # Train Input Function
    def train_input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((trainingFeatures, y1))
        dataset = dataset.repeat(args.epochs).batch(args.tf_batch_size)
        return dataset

    # Test Input Function
    def eval_input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((testFeatures, y2))
        return dataset.repeat(args.epochs).batch(args.tf_batch_size)

    # Provide list of GPUs should be used to train the model

    distribution = tf.distribute.experimental.ParameterServerStrategy()
    print('Number of devices: {}'.format(distribution.num_replicas_in_sync))

    # Configuration of  training model

    config = tf.estimator.RunConfig(train_distribute=distribution,
                                    model_dir=args.tf_model_dir,
                                    save_summary_steps=100,
                                    save_checkpoints_steps=100)

    # Build 3 layer DNN classifier

    model = tf.estimator.DNNClassifier(hidden_units=[13, 65, 110],
                                       feature_columns=feature_columns,
                                       optimizer=tf.train.AdamOptimizer(
                                           learning_rate=args.learning_rate,
                                           beta1=args.beta1,
                                           beta2=args.beta2),
                                       model_dir=args.tf_model_dir,
                                       n_classes=105,
                                       config=config)

    export_final = tf.estimator.FinalExporter(
        args.tf_export_dir,
        serving_input_receiver_fn=serving_input_receiver_fn)

    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                        max_steps=args.tf_train_steps)

    eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn,
                                      steps=100,
                                      exporters=export_final,
                                      throttle_secs=1,
                                      start_delay_secs=1)

    # Train and Evaluate the model

    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)

    MODEL_EXPORT_PATH = args.tf_model_dir

    def predict(request):
        """ 
        Define custom predict function to be used by local prediction
        and explainer. Set anchor_tabular predict function so it always returns predicted class
        """
        # Get model exporter path
        for dir in os.listdir(args.tf_model_dir):
            if re.match('[0-9]', dir):
                exported_path = os.path.join(args.tf_model_dir, dir)
                break
        else:
            raise Exception("Model path not found")

        # Prepare model input data
        feature_cols = [
            "b3001", "b3002", "b3003", "b3004", "b3005", "b3006", "b3007",
            "b3008", "b3009", "b3010", "b3011", "b3012", "b3013"
        ]
        input = {
            'b3001': [],
            'b3002': [],
            'b3003': [],
            'b3004': [],
            'b3005': [],
            'b3006': [],
            'b3007': [],
            'b3008': [],
            'b3009': [],
            'b3010': [],
            'b3011': [],
            'b3012': [],
            'b3013': []
        }

        X = request
        if np.ndim(X) != 2:
            for i in range(len(X)):
                input[feature_cols[i]].append(X[i])
        else:
            for i in range(len(X)):
                for j in range(len(X[i])):
                    input[feature_cols[j]].append(X[i][j])

        # Open a Session to predict
        with tf.Session() as sess:
            tf.saved_model.loader.load(sess,
                                       [tf.saved_model.tag_constants.SERVING],
                                       exported_path)
            predictor = tf.contrib.predictor.from_saved_model(
                exported_path, signature_def_key='predict')
            output_dict = predictor(input)
        sess.close()
        output = {}
        output["predictions"] = {
            "probabilities": output_dict["probabilities"].tolist()
        }
        return np.asarray(output['predictions']["probabilities"])

    #Initialize and fit
    feature_cols = [
        "b3001", "b3002", "b3003", "b3004", "b3005", "b3006", "b3007", "b3008",
        "b3009", "b3010", "b3011", "b3012", "b3013"
    ]
    explainer = AnchorTabular(predict, feature_cols)

    explainer.fit(x1, disc_perc=(25, 50, 75))

    #Save Explainer file
    #Save explainer file with .dill extension. It will be used when creating the InferenceService
    if not os.path.exists(args.explainer_dir):
        os.mkdir(args.explainer_dir)
    with open("%s/explainer.dill" % args.explainer_dir, 'wb') as f:
        dill.dump(explainer, f)