def preparation_cam(self, data_):
        image_show = read_image(data_)
        result = self.predict_fn(image_show)

        logit = result[0][0]
        if abs(np.sum(logit) - 1.0) > 1e-4:
            # softmax
            logit = logit - np.max(logit)
            exp_result = np.exp(logit)
            probability = exp_result / np.sum(exp_result)
        else:
            probability = logit

        # only interpret top 1
        pred_label = np.argsort(probability)
        pred_label = pred_label[-1:]

        self.predicted_label = pred_label[0]
        self.predicted_probability = probability[pred_label[0]]
        self.image = image_show[0]
        self.labels = pred_label

        fc_weights = paddle_get_fc_weights()
        feature_maps = result[1]

        l = pred_label[0]
        ln = l
        if self.label_names is not None:
            ln = self.label_names[l]

        prob_str = "%.3f" % (probability[pred_label[0]])
        logging.info("predicted result: {} with probability {}.".format(
            ln, prob_str))
        return feature_maps, fc_weights
Пример #2
0
    def preparation_lime(self, data_):
        image_show = read_image(data_)
        result = self.predict_fn(image_show)

        result = result[0]  # only one image here.

        if abs(np.sum(result) - 1.0) > 1e-4:
            # softmax
            result = result - np.max(result)
            exp_result = np.exp(result)
            probability = exp_result / np.sum(exp_result)
        else:
            probability = result

        # only interpret top 1
        pred_label = np.argsort(probability)
        pred_label = pred_label[-1:]

        self.predicted_label = pred_label[0]
        self.predicted_probability = probability[pred_label[0]]
        self.image = image_show[0]
        self.labels = pred_label

        l = pred_label[0]
        ln = l
        if self.label_names is not None:
            ln = self.label_names[l]

        prob_str = "%.3f" % (probability[pred_label[0]])
        logging.info("predicted result: {} with probability {}.".format(
            ln, prob_str))

        end = time.time()
        algo = lime_base.LimeImageInterpreter()
        interpreter = algo.interpret_instance(
            self.image,
            self.predict_fn,
            self.labels,
            0,
            num_samples=self.num_samples,
            batch_size=self.batch_size)
        self.lime_interpreter = interpreter
        logging.info('lime time: ' + str(time.time() - end) + 's.')
    def preparation_normlime(self, data_):
        self._lime = LIME(self.predict_fn, self.label_names, self.num_samples,
                          self.batch_size)
        self._lime.preparation_lime(data_)

        image_show = read_image(data_)

        self.predicted_label = self._lime.predicted_label
        self.predicted_probability = self._lime.predicted_probability
        self.image = image_show[0]
        self.labels = self._lime.labels
        logging.info('performing NormLIME operations ...')

        cluster_labels = self.predict_cluster_labels(
            compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
            self._lime.lime_interpreter.segments)

        g_weights = self.predict_using_normlime_weights(
            self.labels, cluster_labels)

        return g_weights
Пример #4
0
def precompute_global_classifier(dataset,
                                 predict_fn,
                                 save_path,
                                 batch_size=50,
                                 max_num_samples=1000):
    from sklearn.linear_model import LogisticRegression

    root_path = gen_user_home()
    root_path = osp.join(root_path, '.paddlex')
    h_pre_models = osp.join(root_path, "pre_models")
    if not osp.exists(h_pre_models):
        if not osp.exists(root_path):
            os.makedirs(root_path)
        url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
        pdx.utils.download_and_decompress(url, path=root_path)
    h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
    kmeans_model = load_kmeans_model(h_pre_models_kmeans)

    image_list = []
    for item in dataset.file_list:
        image_list.append(item[0])

    x_data = []
    y_labels = []

    num_features = len(kmeans_model.cluster_centers_)

    logging.info(
        "Initialization for NormLIME: Computing each sample in the test list.",
        use_color=True)

    for each_data_ in tqdm.tqdm(image_list):
        x_data_i = np.zeros((num_features))
        image_show = read_image(each_data_)
        result = predict_fn(image_show)
        result = result[0]  # only one image here.
        c = compute_features_for_kmeans(image_show).transpose((1, 2, 0))

        segments = np.zeros((image_show.shape[1], image_show.shape[2]),
                            np.int32)
        num_blocks = 10
        height_per_i = segments.shape[0] // num_blocks + 1
        width_per_i = segments.shape[1] // num_blocks + 1

        for i in range(segments.shape[0]):
            for j in range(segments.shape[1]):
                segments[i,
                         j] = i // height_per_i * num_blocks + j // width_per_i

        # segments = quickshift(image_show[0], sigma=1)
        X = get_feature_for_kmeans(c, segments)

        try:
            cluster_labels = kmeans_model.predict(X)
        except AttributeError:
            from sklearn.metrics import pairwise_distances_argmin_min
            cluster_labels, _ = pairwise_distances_argmin_min(
                X, kmeans_model.cluster_centers_)

        for c in cluster_labels:
            x_data_i[c] = 1

        # x_data_i /= len(cluster_labels)

        pred_y_i = np.argmax(result)
        y_labels.append(pred_y_i)
        x_data.append(x_data_i)

    if len(np.unique(y_labels)) < 2:
        logging.info("Warning: The test samples in the dataset is limited.\n \
                     NormLIME may have no effect on the results.\n \
                     Try to add more test samples, or see the results of LIME."
                     )
        num_classes = np.max(np.unique(y_labels)) + 1
        normlime_weights_all_labels = {}
        for class_index in range(num_classes):
            w = np.ones((num_features)) / num_features
            normlime_weights_all_labels[class_index] = {
                i: wi
                for i, wi in enumerate(w)
            }
        logging.info(
            "Saving the computed normlime_weights in {}".format(save_path))

        np.save(save_path, normlime_weights_all_labels)
        return save_path

    clf = LogisticRegression(multi_class='multinomial', max_iter=1000)
    clf.fit(x_data, y_labels)

    num_classes = np.max(np.unique(y_labels)) + 1
    normlime_weights_all_labels = {}

    if len(y_labels) / len(np.unique(y_labels)) < 3:
        logging.info("Warning: The test samples in the dataset is limited.\n \
                     NormLIME may have no effect on the results.\n \
                     Try to add more test samples, or see the results of LIME."
                     )

    if len(np.unique(y_labels)) == 2:
        # binary: clf.coef_ has shape of [1, num_features]
        for class_index in range(num_classes):
            if class_index not in clf.classes_:
                w = np.ones((num_features)) / num_features
                normlime_weights_all_labels[class_index] = {
                    i: wi
                    for i, wi in enumerate(w)
                }
                continue

            if clf.classes_[0] == class_index:
                w = -clf.coef_[0]
            else:
                w = clf.coef_[0]

            # softmax
            w = w - np.max(w)
            exp_w = np.exp(w * 10)
            w = exp_w / np.sum(exp_w)

            normlime_weights_all_labels[class_index] = {
                i: wi
                for i, wi in enumerate(w)
            }
    else:
        # clf.coef_ has shape of [len(np.unique(y_labels)), num_features]
        for class_index in range(num_classes):
            if class_index not in clf.classes_:
                w = np.ones((num_features)) / num_features
                normlime_weights_all_labels[class_index] = {
                    i: wi
                    for i, wi in enumerate(w)
                }
                continue

            coef_class_index = np.where(clf.classes_ == class_index)[0][0]
            w = clf.coef_[coef_class_index]

            # softmax
            w = w - np.max(w)
            exp_w = np.exp(w * 10)
            w = exp_w / np.sum(exp_w)

            normlime_weights_all_labels[class_index] = {
                i: wi
                for i, wi in enumerate(w)
            }

    logging.info(
        "Saving the computed normlime_weights in {}".format(save_path))
    np.save(save_path, normlime_weights_all_labels)

    return save_path
Пример #5
0
def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
                            save_dir):
    root_path = gen_user_home()
    root_path = osp.join(root_path, '.paddlex')
    h_pre_models = osp.join(root_path, "pre_models")
    if not osp.exists(h_pre_models):
        if not osp.exists(root_path):
            os.makedirs(root_path)
        url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
        pdx.utils.download_and_decompress(url, path=root_path)
    h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
    kmeans_model = load_kmeans_model(h_pre_models_kmeans)

    for data_index, each_data_ in enumerate(list_data_):
        if isinstance(each_data_, str):
            save_path = "lime_weights_s{}_{}.npy".format(
                num_samples,
                each_data_.split('/')[-1].split('.')[0])
            save_path = os.path.join(save_dir, save_path)
        else:
            save_path = "lime_weights_s{}_{}.npy".format(
                num_samples, data_index)
            save_path = os.path.join(save_dir, save_path)

        if os.path.exists(save_path):
            logging.info(save_path + ' exists, not computing this one.',
                         use_color=True)
            continue
        img_file_name = each_data_ if isinstance(each_data_,
                                                 str) else data_index
        logging.info('processing ' + img_file_name +
                     ' [{}/{}]'.format(data_index, len(list_data_)),
                     use_color=True)

        image_show = read_image(each_data_)
        result = predict_fn(image_show)
        result = result[0]  # only one image here.

        if abs(np.sum(result) - 1.0) > 1e-4:
            # softmax
            exp_result = np.exp(result)
            probability = exp_result / np.sum(exp_result)
        else:
            probability = result

        pred_label = np.argsort(probability)[::-1]

        # top_k = argmin(top_n) > threshold
        threshold = 0.05
        top_k = 0
        for l in pred_label:
            if probability[l] < threshold or top_k == 5:
                break
            top_k += 1

        if top_k == 0:
            top_k = 1

        pred_label = pred_label[:top_k]

        algo = lime_base.LimeImageInterpreter()
        interpreter = algo.interpret_instance(image_show[0],
                                              predict_fn,
                                              pred_label,
                                              0,
                                              num_samples=num_samples,
                                              batch_size=batch_size)

        X = get_feature_for_kmeans(
            compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
            interpreter.segments)
        try:
            cluster_labels = kmeans_model.predict(X)
        except AttributeError:
            from sklearn.metrics import pairwise_distances_argmin_min
            cluster_labels, _ = pairwise_distances_argmin_min(
                X, kmeans_model.cluster_centers_)
        save_one_lime_predict_and_kmean_labels(interpreter.local_weights,
                                               pred_label, cluster_labels,
                                               save_path)