Пример #1
0
    def __init__(self, name, values, scale, name_offset, offset, labels=None):
        super().__init__()

        # leading label
        font = name.document().defaultFont()
        if self.bold_label:
            font.setWeight(QFont.Bold)
        name.setFont(font)
        name.setPos(name_offset, -10)
        name.setParentItem(self)

        # prediction marker
        self.dot = self.DOT_ITEM_CLS(self.dot_r, scale, offset, values[0],
                                     values[-1])
        self.dot.setParentItem(self)

        # line
        line = QGraphicsLineItem(
            min(values) * scale + offset, 0,
            max(values) * scale + offset, 0)
        line.setParentItem(self)

        if labels is None:
            labels = [str(abs(v) if v == -0 else v) for v in values]

        old_x_tick = None
        shown_items = []
        w = QGraphicsSimpleTextItem(labels[0]).boundingRect().width()
        text_finish = values[0] * scale - w + offset - 10
        for i, (label, value) in enumerate(zip(labels, values)):
            text = QGraphicsSimpleTextItem(label)
            x_text = value * scale - text.boundingRect().width() / 2 + offset
            if text_finish > x_text - 10:
                y_text, y_tick = self.dot_r * 0.7, 0
                text_finish = values[0] * scale + offset
            else:
                y_text = -text.boundingRect().height() - self.dot_r * 0.7
                y_tick = -self.tick_height
                text_finish = x_text + text.boundingRect().width()
            text.setPos(x_text, y_text)
            if not collides(text, shown_items):
                text.setParentItem(self)
                shown_items.append(text)

            x_tick = value * scale - self.tick_width / 2 + offset
            tick = QGraphicsRectItem(x_tick, y_tick, self.tick_width,
                                     self.tick_height)
            tick.setBrush(QColor(Qt.black))
            tick.setParentItem(self)

            if self.half_tick_height and i:
                x = x_tick - (x_tick - old_x_tick) / 2
                half_tick = QGraphicsLineItem(x, -self.half_tick_height, x, 0)
                half_tick.setParentItem(self)
            old_x_tick = x_tick
Пример #2
0
    def __init__(self, name, values, scale, name_offset, offset, get_points,
                 title, get_probabilities):
        super().__init__()
        self.scale = scale
        self.offset = offset
        self.get_points = get_points
        self.min_val = min(values)
        self.max_val = max(values)

        # leading labels
        font = name.document().defaultFont()
        font.setWeight(QFont.Bold)
        name_total = QGraphicsTextItem("Total")
        name_total.setFont(font)
        name_total.setPos(name_offset, -25)
        name_total.setParentItem(self)
        name.setFont(font)
        name.setPos(name_offset, 10)
        name.setParentItem(self)

        # prediction marker
        self.dot = ProbabilitiesDotItem(
            self.dot_r, scale, offset, values[0], values[-1],
            title, get_probabilities)
        self.dot.setPos(0, (- self.dot_r + self.y_diff) / 2)
        self.dot.setParentItem(self)

        # two lines
        t_line = QGraphicsLineItem(self.min_val * scale + offset, 0,
                                   self.max_val * scale + offset, 0)
        p_line = QGraphicsLineItem(self.min_val * scale + offset, self.y_diff,
                                   self.max_val * scale + offset, self.y_diff)
        t_line.setParentItem(self)
        p_line.setParentItem(self)

        # ticks and labels
        old_x_tick = values[0] * scale + offset
        for i, value in enumerate(values[1:]):
            x_tick = value * scale + offset
            x = x_tick - (x_tick - old_x_tick) / 2
            half_tick = QGraphicsLineItem(x, - self.tick_height / 2, x, 0)
            half_tick.setParentItem(self)
            old_x_tick = x_tick
            if i == len(values) - 2:
                break
            text = QGraphicsTextItem(str(abs(value) if value == -0 else value))
            x_text = value * scale - text.boundingRect().width() / 2 + offset
            y_text = - text.boundingRect().height() - self.dot_r * 0.7
            text.setPos(x_text, y_text)
            text.setParentItem(self)
            tick = QGraphicsLineItem(x_tick, -self.tick_height, x_tick, 0)
            tick.setParentItem(self)

        self.prob_items = [
            (i / 10, QGraphicsTextItem(" " + str(i * 10) + " "),
             QGraphicsLineItem(0, 0, 0, 0)) for i in range(1, 10)]
Пример #3
0
class OWNomogram(OWWidget):
    name = "Nomogram"
    description = " Nomograms for Visualization of Naive Bayesian" \
                  " and Logistic Regression Classifiers."
    icon = "icons/Nomogram.svg"
    priority = 2000

    inputs = [("Classifier", Model, "set_classifier"),
              ("Data", Table, "set_instance")]

    MAX_N_ATTRS = 1000
    POINT_SCALE = 0
    ALIGN_LEFT = 0
    ALIGN_ZERO = 1
    ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier)
    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    normalize_probabilities = Setting(False)
    scale = Setting(1)
    display_index = Setting(1)
    n_attributes = Setting(10)
    sort_index = Setting(SortBy.ABSOLUTE)
    cont_feature_dim_index = Setting(0)

    graph_name = "scene"

    class Error(OWWidget.Error):
        invalid_classifier = Msg("Nomogram accepts only Naive Bayes and "
                                 "Logistic Regression classifiers.")

    def __init__(self):
        super().__init__()
        self.instances = None
        self.domain = None
        self.data = None
        self.classifier = None
        self.align = OWNomogram.ALIGN_ZERO
        self.log_odds_ratios = []
        self.log_reg_coeffs = []
        self.log_reg_coeffs_orig = []
        self.log_reg_cont_data_extremes = []
        self.p = None
        self.b0 = None
        self.points = []
        self.feature_items = []
        self.feature_marker_values = []
        self.scale_back = lambda x: x
        self.scale_forth = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.old_target_class_index = self.target_class_index
        self.markers_set = False
        self.repaint = False

        # GUI
        box = gui.vBox(self.controlArea, "Target class")
        self.class_combo = gui.comboBox(box,
                                        self,
                                        "target_class_index",
                                        callback=self._class_combo_changed,
                                        contentsLength=12)
        self.norm_check = gui.checkBox(
            box,
            self,
            "normalize_probabilities",
            "Normalize probabilities",
            hidden=True,
            callback=self._norm_check_changed,
            tooltip="For multiclass data 1 vs. all probabilities do not"
            " sum to 1 and therefore could be normalized.")

        self.scale_radio = gui.radioButtons(
            self.controlArea,
            self,
            "scale", ["Point scale", "Log odds ratios"],
            box="Scale",
            callback=self._radio_button_changed)

        box = gui.vBox(self.controlArea, "Display features")
        grid = QGridLayout()
        self.display_radio = gui.radioButtonsInBox(
            box,
            self,
            "display_index", [],
            orientation=grid,
            callback=self._display_radio_button_changed)
        radio_all = gui.appendRadioButton(self.display_radio,
                                          "All:",
                                          addToLayout=False)
        radio_best = gui.appendRadioButton(self.display_radio,
                                           "Best ranked:",
                                           addToLayout=False)
        spin_box = gui.hBox(None, margin=0)
        self.n_spin = gui.spin(spin_box,
                               self,
                               "n_attributes",
                               1,
                               self.MAX_N_ATTRS,
                               label=" ",
                               controlWidth=60,
                               callback=self._n_spin_changed)
        grid.addWidget(radio_all, 1, 1)
        grid.addWidget(radio_best, 2, 1)
        grid.addWidget(spin_box, 2, 2)

        self.sort_combo = gui.comboBox(box,
                                       self,
                                       "sort_index",
                                       label="Sort by: ",
                                       items=SortBy.items(),
                                       orientation=Qt.Horizontal,
                                       callback=self._sort_combo_changed)

        self.cont_feature_dim_combo = gui.comboBox(
            box,
            self,
            "cont_feature_dim_index",
            label="Continuous features: ",
            items=["1D projection", "2D curve"],
            orientation=Qt.Horizontal,
            callback=self._cont_feature_dim_combo_changed)

        gui.rubber(self.controlArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(
            self.scene,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            renderHints=QPainter.Antialiasing | QPainter.TextAntialiasing
            | QPainter.SmoothPixmapTransform,
            alignment=Qt.AlignLeft)
        self.view.viewport().installEventFilter(self)
        self.view.viewport().setMinimumWidth(300)
        self.view.sizeHint = lambda: QSize(600, 500)
        self.mainArea.layout().addWidget(self.view)

    def _class_combo_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        coeffs = [
            np.nan_to_num(p[self.target_class_index] /
                          p[self.old_target_class_index]) for p in self.points
        ]
        points = [p[self.old_target_class_index] for p in self.points]
        self.feature_marker_values = [
            self.get_points_from_coeffs(v, c, p)
            for (v, c, p) in zip(self.feature_marker_values, coeffs, points)
        ]
        self.update_scene()
        self.old_target_class_index = self.target_class_index

    def _norm_check_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        self.update_scene()

    def _radio_button_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        self.update_scene()

    def _display_radio_button_changed(self):
        self.__hide_attrs(self.n_attributes if self.display_index else None)

    def _n_spin_changed(self):
        self.display_index = 1
        self.__hide_attrs(self.n_attributes)

    def __hide_attrs(self, n_show):
        if self.nomogram_main is None:
            return
        self.nomogram_main.hide(n_show)
        if self.vertical_line:
            x = self.vertical_line.line().x1()
            y = self.nomogram_main.layout.preferredHeight() + 30
            self.vertical_line.setLine(x, -6, x, y)
            self.hidden_vertical_line.setLine(x, -6, x, y)
        rect = QRectF(self.scene.sceneRect().x(),
                      self.scene.sceneRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height())
        self.scene.setSceneRect(rect.adjusted(0, 0, 70, 70))

    def _sort_combo_changed(self):
        if self.nomogram_main is None:
            return
        self.nomogram_main.hide(None)
        self.nomogram_main.sort(self.sort_index)
        self.__hide_attrs(self.n_attributes if self.display_index else None)

    def _cont_feature_dim_combo_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        self.update_scene()

    def eventFilter(self, obj, event):
        if obj is self.view.viewport() and event.type() == QEvent.Resize:
            self.repaint = True
            values = [item.dot.value for item in self.feature_items]
            self.feature_marker_values = self.scale_back(values)
            self.update_scene()
        return super().eventFilter(obj, event)

    def update_controls(self):
        self.class_combo.clear()
        self.norm_check.setHidden(True)
        self.cont_feature_dim_combo.setEnabled(True)
        if self.domain:
            self.class_combo.addItems(self.domain.class_vars[0].values)
            if len(self.domain.attributes) > self.MAX_N_ATTRS:
                self.display_index = 1
            if len(self.domain.class_vars[0].values) > 2:
                self.norm_check.setHidden(False)
            if not self.domain.has_continuous_attributes():
                self.cont_feature_dim_combo.setEnabled(False)
                self.cont_feature_dim_index = 0
        model = self.sort_combo.model()
        item = model.item(SortBy.POSITIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        item = model.item(SortBy.NEGATIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        self.align = OWNomogram.ALIGN_ZERO
        if self.classifier and isinstance(self.classifier,
                                          LogisticRegressionClassifier):
            self.align = OWNomogram.ALIGN_LEFT
            item = model.item(SortBy.POSITIVE)
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            item = model.item(SortBy.NEGATIVE)
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            if self.sort_index in (SortBy.POSITIVE, SortBy.POSITIVE):
                self.sort_index = SortBy.NO_SORTING

    def set_instance(self, data):
        self.instances = data
        self.feature_marker_values = []
        self.set_feature_marker_values()

    def set_classifier(self, classifier):
        self.closeContext()
        self.classifier = classifier
        self.Error.clear()
        if self.classifier and not isinstance(self.classifier,
                                              self.ACCEPTABLE):
            self.Error.invalid_classifier()
            self.classifier = None
        self.domain = self.classifier.domain if self.classifier else None
        self.data = None
        self.calculate_log_odds_ratios()
        self.calculate_log_reg_coefficients()
        self.update_controls()
        self.target_class_index = 0
        self.openContext(self.domain and self.domain.class_var)
        self.points = self.log_odds_ratios or self.log_reg_coeffs
        self.feature_marker_values = []
        self.old_target_class_index = self.target_class_index
        self.update_scene()

    def calculate_log_odds_ratios(self):
        self.log_odds_ratios = []
        self.p = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, NaiveBayesModel):
            return

        log_cont_prob = self.classifier.log_cont_prob
        class_prob = self.classifier.class_prob
        for i in range(len(self.domain.attributes)):
            ca = np.exp(log_cont_prob[i]) * class_prob[:, None]
            _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None]
            self.log_odds_ratios.append(np.log(_or))
        self.p = class_prob

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = self.classifier.original_data.transform(self.domain)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), min_values,
                                   max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack(
                    (coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

    def update_scene(self):
        if not self.repaint:
            return
        self.clear_scene()
        if self.domain is None or not len(self.points[0]):
            return

        name_items = [
            QGraphicsTextItem(a.name) for a in self.domain.attributes
        ]
        point_text = QGraphicsTextItem("Points")
        probs_text = QGraphicsTextItem("Probabilities (%)")
        all_items = name_items + [point_text, probs_text]
        name_offset = -max(t.boundingRect().width() for t in all_items) - 50
        w = self.view.viewport().rect().width()
        max_width = w + name_offset - 100

        points = [pts[self.target_class_index] for pts in self.points]
        minimums = [min(p) for p in points]
        if self.align == OWNomogram.ALIGN_LEFT:
            points = [p - m for m, p in zip(minimums, points)]
        max_ = np.nan_to_num(max(max(abs(p)) for p in points))
        d = 100 / max_ if max_ else 1
        if self.scale == OWNomogram.POINT_SCALE:
            points = [p * d for p in points]

        if self.scale == OWNomogram.POINT_SCALE and \
                self.align == OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: [
                p / d + m for m, p in zip(minimums, x)
            ]
            self.scale_forth = lambda x: [(p - m) * d
                                          for m, p in zip(minimums, x)]
        if self.scale == OWNomogram.POINT_SCALE and \
                self.align != OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: [p / d for p in x]
            self.scale_forth = lambda x: [p * d for p in x]
        if self.scale != OWNomogram.POINT_SCALE and \
                self.align == OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: [p + m for m, p in zip(minimums, x)]
            self.scale_forth = lambda x: [p - m for m, p in zip(minimums, x)]
        if self.scale != OWNomogram.POINT_SCALE and \
                self.align != OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: x
            self.scale_forth = lambda x: x

        point_item, nomogram_head = self.create_main_nomogram(
            name_items, points, max_width, point_text, name_offset)
        probs_item, nomogram_foot = self.create_footer_nomogram(
            probs_text, d, minimums, max_width, name_offset)
        for item in self.feature_items:
            item.dot.point_dot = point_item.dot
            item.dot.probs_dot = probs_item.dot
            item.dot.vertical_line = self.hidden_vertical_line

        self.nomogram = nomogram = NomogramItem()
        nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot])
        self.scene.addItem(nomogram)
        self.set_feature_marker_values()
        rect = QRectF(self.scene.itemsBoundingRect().x(),
                      self.scene.itemsBoundingRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height())
        self.scene.setSceneRect(rect.adjusted(0, 0, 70, 70))

    def create_main_nomogram(self, name_items, points, max_width, point_text,
                             name_offset):
        cls_index = self.target_class_index
        min_p = min(min(p) for p in points)
        max_p = max(max(p) for p in points)
        values = self.get_ruler_values(min_p, max_p, max_width)
        min_p, max_p = min(values), max(values)
        diff_ = np.nan_to_num(max_p - min_p)
        scale_x = max_width / diff_ if diff_ else max_width

        nomogram_header = NomogramItem()
        point_item = RulerItem(point_text, values, scale_x, name_offset,
                               -scale_x * min_p)
        point_item.setPreferredSize(point_item.preferredWidth(), 35)
        nomogram_header.add_items([point_item])

        self.nomogram_main = SortableNomogramItem()
        cont_feature_item_class = ContinuousFeature2DItem if \
            self.cont_feature_dim_index else ContinuousFeatureItem
        self.feature_items = [
            DiscreteFeatureItem(name_items[i], [val for val in att.values],
                                points[i], scale_x, name_offset, -scale_x *
                                min_p, self.points[i][cls_index])
            if att.is_discrete else cont_feature_item_class(
                name_items[i], self.log_reg_cont_data_extremes[i][cls_index],
                self.get_ruler_values(
                    np.min(points[i]), np.max(points[i]),
                    scale_x * (np.max(points[i]) - np.min(points[i])),
                    False), scale_x, name_offset, -scale_x *
                min_p, self.log_reg_coeffs_orig[i][cls_index][0])
            for i, att in enumerate(self.domain.attributes)
        ]
        self.nomogram_main.add_items(
            self.feature_items, self.sort_index,
            self.n_attributes if self.display_index else None)

        x = -scale_x * min_p
        y = self.nomogram_main.layout.preferredHeight() + 30
        self.vertical_line = QGraphicsLineItem(x, -6, x, y)
        self.vertical_line.setPen(QPen(Qt.DotLine))
        self.vertical_line.setParentItem(point_item)
        self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y)
        pen = QPen(Qt.DashLine)
        pen.setBrush(QColor(Qt.red))
        self.hidden_vertical_line.setPen(pen)
        self.hidden_vertical_line.setParentItem(point_item)

        return point_item, nomogram_header

    def create_footer_nomogram(self, probs_text, d, minimums, max_width,
                               name_offset):
        eps, d_ = 0.05, 1
        k = -np.log(self.p / (1 - self.p)) if self.p is not None else -self.b0
        min_sum = k[self.target_class_index] - np.log((1 - eps) / eps)
        max_sum = k[self.target_class_index] - np.log(eps / (1 - eps))
        if self.align == OWNomogram.ALIGN_LEFT:
            max_sum = max_sum - sum(minimums)
            min_sum = min_sum - sum(minimums)
            for i in range(len(k)):
                k[i] = k[i] - sum(
                    [min(q) for q in [p[i] for p in self.points]])
        if self.scale == OWNomogram.POINT_SCALE:
            min_sum *= d
            max_sum *= d
            d_ = d

        values = self.get_ruler_values(min_sum, max_sum, max_width)
        min_sum, max_sum = min(values), max(values)
        diff_ = np.nan_to_num(max_sum - min_sum)
        scale_x = max_width / diff_ if diff_ else max_width
        cls_var, cls_index = self.domain.class_var, self.target_class_index
        nomogram_footer = NomogramItem()

        def get_normalized_probabilities(val):
            if not self.normalize_probabilities:
                return 1 / (1 + np.exp(k[cls_index] - val / d_))
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum

        def get_points(prob):
            if not self.normalize_probabilities:
                return (k[cls_index] - np.log(1 / prob - 1)) * d_
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_

        self.markers_set = False
        probs_item = ProbabilitiesRulerItem(
            probs_text,
            values,
            scale_x,
            name_offset,
            -scale_x * min_sum,
            get_points=get_points,
            title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]),
            get_probabilities=get_normalized_probabilities)
        self.markers_set = True
        nomogram_footer.add_items([probs_item])
        return probs_item, nomogram_footer

    def __get_totals_for_class_values(self, minimums):
        cls_index = self.target_class_index
        marker_values = [item.dot.value for item in self.feature_items]
        if not self.markers_set:
            marker_values = self.scale_forth(marker_values)
        totals = np.empty(len(self.domain.class_var.values))
        totals[cls_index] = sum(marker_values)
        marker_values = self.scale_back(marker_values)
        for i in range(len(self.domain.class_var.values)):
            if i == cls_index:
                continue
            coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points]
            points = [p[cls_index] for p in self.points]
            total = sum([
                self.get_points_from_coeffs(v, c, p)
                for (v, c, p) in zip(marker_values, coeffs, points)
            ])
            if self.align == OWNomogram.ALIGN_LEFT:
                points = [p - m for m, p in zip(minimums, points)]
                total -= sum([min(p) for p in [p[i] for p in self.points]])
            d = 100 / max(max(abs(p)) for p in points)
            if self.scale == OWNomogram.POINT_SCALE:
                total *= d
            totals[i] = total
        return totals

    def set_feature_marker_values(self):
        if not (len(self.points) and len(self.feature_items)):
            return
        if not len(self.feature_marker_values):
            self._init_feature_marker_values()
        self.feature_marker_values = self.scale_forth(
            self.feature_marker_values)
        item = self.feature_items[0]
        for i, item in enumerate(self.feature_items):
            item.dot.move_to_val(self.feature_marker_values[i])
        item.dot.probs_dot.move_to_sum()

    def _init_feature_marker_values(self):
        self.feature_marker_values = []
        cls_index = self.target_class_index
        instances = Table(self.domain, self.instances) \
            if self.instances else None
        for i, attr in enumerate(self.domain.attributes):
            value, feature_val = 0, None
            if len(self.log_reg_coeffs):
                if attr.is_discrete:
                    ind, n = unique(self.data.X[:, i], return_counts=True)
                    feature_val = np.nan_to_num(ind[np.argmax(n)])
                else:
                    feature_val = mean(self.data.X[:, i])
            inst_in_dom = instances and attr in instances.domain
            if inst_in_dom and not np.isnan(instances[0][attr]):
                feature_val = instances[0][attr]
            if feature_val is not None:
                value = self.points[i][cls_index][int(feature_val)] \
                    if attr.is_discrete else \
                    self.log_reg_coeffs_orig[i][cls_index][0] * feature_val
            self.feature_marker_values.append(value)

    def clear_scene(self):
        self.feature_items = []
        self.scale_back = lambda x: x
        self.scale_forth = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.scene.clear()

    def send_report(self):
        self.report_plot()

    @staticmethod
    def reconstruct_domain(original, preprocessed):
        # abuse dict to make "in" comparisons faster
        attrs = OrderedDict()
        for attr in preprocessed.attributes:
            cv = attr._compute_value.variable._compute_value
            var = cv.variable if cv else original[attr.name]
            if var in attrs:  # the reason for OrderedDict
                continue
            attrs[var] = None  # we only need keys
        attrs = list(attrs.keys())
        return Domain(attrs, original.class_var, original.metas)

    @staticmethod
    def get_ruler_values(start, stop, max_width, round_to_nearest=True):
        if max_width == 0:
            return [0]
        diff = np.nan_to_num((stop - start) / max_width)
        if diff <= 0:
            return [0]
        decimals = int(np.floor(np.log10(diff)))
        if diff > 4 * pow(10, decimals):
            step = 5 * pow(10, decimals + 2)
        elif diff > 2 * pow(10, decimals):
            step = 2 * pow(10, decimals + 2)
        elif diff > 1 * pow(10, decimals):
            step = 1 * pow(10, decimals + 2)
        else:
            step = 5 * pow(10, decimals + 1)
        round_by = int(-np.floor(np.log10(step)))
        r = start % step
        if not round_to_nearest:
            _range = np.arange(start + step, stop + r, step) - r
            start, stop = np.floor(start * 100) / 100, np.ceil(
                stop * 100) / 100
            return np.round(np.hstack((start, _range, stop)), 2)
        return np.round(np.arange(start, stop + r + step, step) - r, round_by)

    @staticmethod
    def get_points_from_coeffs(current_value, coefficients, possible_values):
        if any(np.isnan(possible_values)):
            return 0
        indices = np.argsort(possible_values)
        sorted_values = possible_values[indices]
        sorted_coefficients = coefficients[indices]
        for i, val in enumerate(sorted_values):
            if current_value < val:
                break
        diff = sorted_values[i] - sorted_values[i - 1]
        k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \
                                  (sorted_values[i] - sorted_values[i - 1])
        return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \
               sorted_coefficients[i] * sorted_values[i] * (1 - k)
Пример #4
0
    def __init__(self, name, data_extremes, values, scale, name_offset, offset,
                 coef):
        super().__init__()
        data_start, data_stop = data_extremes[0], data_extremes[1]
        self.name = name.toPlainText()
        self.diff = (data_stop - data_start) * coef
        labels = [
            str(
                np.round(
                    data_start + (data_stop - data_start) * i /
                    (self.n_tck - 1), 1)) for i in range(self.n_tck)
        ]

        # leading label
        font = name.document().defaultFont()
        name.setFont(font)
        name.setPos(name_offset, -10)
        name.setParentItem(self)

        # labels
        ascending = data_start < data_stop
        y_start, y_stop = (self.y_diff, 0) if ascending else (0, self.y_diff)
        for i in range(self.n_tck):
            text = QGraphicsSimpleTextItem(labels[i])
            w = text.boundingRect().width()
            y = y_start + (y_stop - y_start) / (self.n_tck - 1) * i
            text.setPos(-5 - w, y - 8)
            text.setParentItem(self)
            tick = QGraphicsLineItem(-2, y, 2, y)
            tick.setParentItem(self)

        # prediction marker
        self.dot = Continuous2DMovableDotItem(self.dot_r, scale, offset,
                                              values[0], values[-1], y_start,
                                              y_stop)
        self.dot.tooltip_labels = labels
        self.dot.tooltip_values = values
        self.dot.setParentItem(self)
        h_line = QGraphicsLineItem(values[0] * scale + offset, self.y_diff / 2,
                                   values[-1] * scale + offset,
                                   self.y_diff / 2)
        pen = QPen(Qt.DashLine)
        pen.setBrush(QColor(Qt.red))
        h_line.setPen(pen)
        h_line.setParentItem(self)
        self.dot.horizontal_line = h_line

        # line
        line = QGraphicsLineItem(values[0] * scale + offset, y_start,
                                 values[-1] * scale + offset, y_stop)
        line.setParentItem(self)

        # ticks
        for value in values:
            diff_ = np.nan_to_num(values[-1] - values[0])
            k = (value - values[0]) / diff_ if diff_ else 0
            y_tick = (y_stop - y_start) * k + y_start - self.tick_height / 2
            x_tick = value * scale - self.tick_width / 2 + offset
            tick = QGraphicsRectItem(x_tick, y_tick, self.tick_width,
                                     self.tick_height)
            tick.setBrush(QColor(Qt.black))
            tick.setParentItem(self)

        # rect
        rect = QGraphicsRectItem(values[0] * scale + offset,
                                 -self.y_diff * 0.125,
                                 values[-1] * scale + offset,
                                 self.y_diff * 1.25)
        pen = QPen(Qt.DotLine)
        pen.setBrush(QColor(50, 150, 200, 255))
        rect.setPen(pen)
        rect.setParentItem(self)
        self.setPreferredSize(self.preferredWidth(), self.y_diff * 1.5)
Пример #5
0
class OWNomogram(OWWidget):
    name = "Nomogram"
    description = " Nomograms for Visualization of Naive Bayesian" \
                  " and Logistic Regression Classifiers."
    icon = "icons/Nomogram.svg"
    priority = 2000

    class Inputs:
        classifier = Input("Classifier", Model)
        data = Input("Data", Table)

    MAX_N_ATTRS = 1000
    POINT_SCALE = 0
    ALIGN_LEFT = 0
    ALIGN_ZERO = 1
    ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier)
    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    normalize_probabilities = Setting(False)
    scale = Setting(1)
    display_index = Setting(1)
    n_attributes = Setting(10)
    sort_index = Setting(SortBy.ABSOLUTE)
    cont_feature_dim_index = Setting(0)

    graph_name = "scene"

    class Error(OWWidget.Error):
        invalid_classifier = Msg("Nomogram accepts only Naive Bayes and "
                                 "Logistic Regression classifiers.")

    def __init__(self):
        super().__init__()
        self.instances = None
        self.domain = None
        self.data = None
        self.classifier = None
        self.align = OWNomogram.ALIGN_ZERO
        self.log_odds_ratios = []
        self.log_reg_coeffs = []
        self.log_reg_coeffs_orig = []
        self.log_reg_cont_data_extremes = []
        self.p = None
        self.b0 = None
        self.points = []
        self.feature_items = {}
        self.feature_marker_values = []
        self.scale_marker_values = lambda x: x
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.old_target_class_index = self.target_class_index
        self.repaint = False

        # GUI
        box = gui.vBox(self.controlArea, "Target class")
        self.class_combo = gui.comboBox(
            box, self, "target_class_index", callback=self._class_combo_changed,
            contentsLength=12)
        self.norm_check = gui.checkBox(
            box, self, "normalize_probabilities", "Normalize probabilities",
            hidden=True, callback=self.update_scene,
            tooltip="For multiclass data 1 vs. all probabilities do not"
                    " sum to 1 and therefore could be normalized.")

        self.scale_radio = gui.radioButtons(
            self.controlArea, self, "scale", ["Point scale", "Log odds ratios"],
            box="Scale", callback=self.update_scene)

        box = gui.vBox(self.controlArea, "Display features")
        grid = QGridLayout()
        radio_group = gui.radioButtonsInBox(
            box, self, "display_index", [], orientation=grid,
            callback=self.update_scene)
        radio_all = gui.appendRadioButton(
            radio_group, "All", addToLayout=False)
        radio_best = gui.appendRadioButton(
            radio_group, "Best ranked:", addToLayout=False)
        spin_box = gui.hBox(None, margin=0)
        self.n_spin = gui.spin(
            spin_box, self, "n_attributes", 1, self.MAX_N_ATTRS, label=" ",
            controlWidth=60, callback=self._n_spin_changed)
        grid.addWidget(radio_all, 1, 1)
        grid.addWidget(radio_best, 2, 1)
        grid.addWidget(spin_box, 2, 2)

        self.sort_combo = gui.comboBox(
            box, self, "sort_index", label="Rank by:", items=SortBy.items(),
            orientation=Qt.Horizontal, callback=self.update_scene)

        self.cont_feature_dim_combo = gui.comboBox(
            box, self, "cont_feature_dim_index", label="Numeric features: ",
            items=["1D projection", "2D curve"], orientation=Qt.Horizontal,
            callback=self.update_scene)

        gui.rubber(self.controlArea)

        class _GraphicsView(QGraphicsView):
            def __init__(self, scene, parent, **kwargs):
                for k, v in dict(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 viewportUpdateMode=QGraphicsView.BoundingRectViewportUpdate,
                                 renderHints=(QPainter.Antialiasing |
                                              QPainter.TextAntialiasing |
                                              QPainter.SmoothPixmapTransform),
                                 alignment=(Qt.AlignTop |
                                            Qt.AlignLeft),
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.MinimumExpanding)).items():
                    kwargs.setdefault(k, v)

                super().__init__(scene, parent, **kwargs)

        class GraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                 styleSheet='QGraphicsView {background: white}')
                self.viewport().setMinimumWidth(300)  # XXX: This prevents some tests failing
                self._is_resizing = False

            w = self

            def resizeEvent(self, resizeEvent):
                # Recompute main scene on window width change
                if resizeEvent.size().width() != resizeEvent.oldSize().width():
                    self._is_resizing = True
                    self.w.update_scene()
                    self._is_resizing = False
                return super().resizeEvent(resizeEvent)

            def is_resizing(self):
                return self._is_resizing

            def sizeHint(self):
                return QSize(400, 200)

        class FixedSizeGraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.Minimum))

            def sizeHint(self):
                return QSize(400, 85)

        scene = self.scene = QGraphicsScene(self)

        top_view = self.top_view = FixedSizeGraphicsView(scene, self)
        mid_view = self.view = GraphicsView(scene, self)
        bottom_view = self.bottom_view = FixedSizeGraphicsView(scene, self)

        for view in (top_view, mid_view, bottom_view):
            self.mainArea.layout().addWidget(view)

    def _class_combo_changed(self):
        with np.errstate(invalid='ignore'):
            coeffs = [np.nan_to_num(p[self.target_class_index] /
                                    p[self.old_target_class_index])
                      for p in self.points]
        points = [p[self.old_target_class_index] for p in self.points]
        self.feature_marker_values = [
            self.get_points_from_coeffs(v, c, p) for (v, c, p) in
            zip(self.feature_marker_values, coeffs, points)]
        self.feature_marker_values = np.asarray(self.feature_marker_values)
        self.update_scene()
        self.old_target_class_index = self.target_class_index

    def _n_spin_changed(self):
        self.display_index = 1
        self.update_scene()

    def update_controls(self):
        self.class_combo.clear()
        self.norm_check.setHidden(True)
        self.cont_feature_dim_combo.setEnabled(True)
        if self.domain is not None:
            self.class_combo.addItems(self.domain.class_vars[0].values)
            if len(self.domain.attributes) > self.MAX_N_ATTRS:
                self.display_index = 1
            if len(self.domain.class_vars[0].values) > 2:
                self.norm_check.setHidden(False)
            if not self.domain.has_continuous_attributes():
                self.cont_feature_dim_combo.setEnabled(False)
                self.cont_feature_dim_index = 0
        model = self.sort_combo.model()
        item = model.item(SortBy.POSITIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        item = model.item(SortBy.NEGATIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        self.align = OWNomogram.ALIGN_ZERO
        if self.classifier and isinstance(self.classifier,
                                          LogisticRegressionClassifier):
            self.align = OWNomogram.ALIGN_LEFT

    @Inputs.data
    def set_data(self, data):
        self.instances = data
        self.feature_marker_values = []
        self.set_feature_marker_values()
        self.update_scene()

    @Inputs.classifier
    def set_classifier(self, classifier):
        self.closeContext()
        self.classifier = classifier
        self.Error.clear()
        if self.classifier and not isinstance(self.classifier, self.ACCEPTABLE):
            self.Error.invalid_classifier()
            self.classifier = None
        self.domain = self.classifier.domain if self.classifier else None
        self.data = None
        self.calculate_log_odds_ratios()
        self.calculate_log_reg_coefficients()
        self.update_controls()
        self.target_class_index = 0
        self.openContext(self.domain.class_var if self.domain is not None
                         else None)
        self.points = self.log_odds_ratios or self.log_reg_coeffs
        self.feature_marker_values = []
        self.old_target_class_index = self.target_class_index
        self.update_scene()

    def calculate_log_odds_ratios(self):
        self.log_odds_ratios = []
        self.p = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, NaiveBayesModel):
            return

        log_cont_prob = self.classifier.log_cont_prob
        class_prob = self.classifier.class_prob
        for i in range(len(self.domain.attributes)):
            ca = np.exp(log_cont_prob[i]) * class_prob[:, None]
            _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None]
            self.log_odds_ratios.append(np.log(_or))
        self.p = class_prob

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = self.classifier.original_data.transform(self.domain)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)),
                                   min_values, max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack((coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

    def update_scene(self):
        self.clear_scene()
        if self.domain is None or not len(self.points[0]):
            return

        n_attrs = self.n_attributes if self.display_index else int(1e10)
        attr_inds, attributes = zip(*self.get_ordered_attributes()[:n_attrs])

        name_items = [QGraphicsTextItem(attr.name) for attr in attributes]
        point_text = QGraphicsTextItem("Points")
        probs_text = QGraphicsTextItem("Probabilities (%)")
        all_items = name_items + [point_text, probs_text]
        name_offset = -max(t.boundingRect().width() for t in all_items) - 10
        w = self.view.viewport().rect().width()
        max_width = w + name_offset - 30

        points = [self.points[i][self.target_class_index]
                  for i in attr_inds]
        if self.align == OWNomogram.ALIGN_LEFT:
            points = [p - p.min() for p in points]
        max_ = np.nan_to_num(max(max(abs(p)) for p in points))
        d = 100 / max_ if max_ else 1
        minimums = [p[self.target_class_index].min() for p in self.points]
        if self.scale == OWNomogram.POINT_SCALE:
            points = [p * d for p in points]

            if self.align == OWNomogram.ALIGN_LEFT:
                self.scale_marker_values = lambda x: (x - minimums) * d
            else:
                self.scale_marker_values = lambda x: x * d
        else:
            if self.align == OWNomogram.ALIGN_LEFT:
                self.scale_marker_values = lambda x: x - minimums
            else:
                self.scale_marker_values = lambda x: x

        point_item, nomogram_head = self.create_main_nomogram(
            attributes, attr_inds,
            name_items, points, max_width, point_text, name_offset)
        probs_item, nomogram_foot = self.create_footer_nomogram(
            probs_text, d, minimums, max_width, name_offset)
        for item in self.feature_items.values():
            item.dot.point_dot = point_item.dot
            item.dot.probs_dot = probs_item.dot
            item.dot.vertical_line = self.hidden_vertical_line

        self.nomogram = nomogram = NomogramItem()
        nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot])
        self.scene.addItem(nomogram)

        self.set_feature_marker_values()

        rect = QRectF(self.scene.itemsBoundingRect().x(),
                      self.scene.itemsBoundingRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height()).adjusted(10, 0, 20, 0)
        self.scene.setSceneRect(rect)

        # Clip top and bottom (60 and 150) parts from the main view
        self.view.setSceneRect(rect.x(), rect.y() + 80, rect.width() - 10, rect.height() - 160)
        self.view.viewport().setMaximumHeight(rect.height() - 160)
        # Clip main part from top/bottom views
        # below point values are imprecise (less/more than required) but this
        # is not a problem due to clipped scene content still being drawn
        self.top_view.setSceneRect(rect.x(), rect.y() + 3, rect.width() - 10, 20)
        self.bottom_view.setSceneRect(rect.x(), rect.height() - 110, rect.width() - 10, 30)

    def create_main_nomogram(self, attributes, attr_inds, name_items, points,
                             max_width, point_text, name_offset):
        cls_index = self.target_class_index
        min_p = min(p.min() for p in points)
        max_p = max(p.max() for p in points)
        values = self.get_ruler_values(min_p, max_p, max_width)
        min_p, max_p = min(values), max(values)
        diff_ = np.nan_to_num(max_p - min_p)
        scale_x = max_width / diff_ if diff_ else max_width

        nomogram_header = NomogramItem()
        point_item = RulerItem(point_text, values, scale_x, name_offset,
                               - scale_x * min_p)
        point_item.setPreferredSize(point_item.preferredWidth(), 35)
        nomogram_header.add_items([point_item])

        self.nomogram_main = NomogramItem()
        cont_feature_item_class = ContinuousFeature2DItem if \
            self.cont_feature_dim_index else ContinuousFeatureItem

        feature_items = [
            DiscreteFeatureItem(
                name_item, attr.values, point,
                scale_x, name_offset, - scale_x * min_p)
            if attr.is_discrete else
            cont_feature_item_class(
                name_item, self.log_reg_cont_data_extremes[i][cls_index],
                self.get_ruler_values(
                    point.min(), point.max(),
                    scale_x * point.ptp(), False),
                scale_x, name_offset, - scale_x * min_p)
            for i, attr, name_item, point in zip(attr_inds, attributes, name_items, points)]

        self.nomogram_main.add_items(feature_items)
        self.feature_items = OrderedDict(sorted(zip(attr_inds, feature_items)))

        x = - scale_x * min_p
        y = self.nomogram_main.layout().preferredHeight() + 10
        self.vertical_line = QGraphicsLineItem(x, -6, x, y)
        self.vertical_line.setPen(QPen(Qt.DotLine))
        self.vertical_line.setParentItem(point_item)
        self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y)
        pen = QPen(Qt.DashLine)
        pen.setBrush(QColor(Qt.red))
        self.hidden_vertical_line.setPen(pen)
        self.hidden_vertical_line.setParentItem(point_item)

        return point_item, nomogram_header

    def get_ordered_attributes(self):
        """Return (in_domain_index, attr) pairs, ordered by method in SortBy combo"""
        if self.domain is None or not self.domain.attributes:
            return []

        attrs = self.domain.attributes
        sort_by = self.sort_index
        class_value = self.target_class_index

        if sort_by == SortBy.NO_SORTING:
            return list(enumerate(attrs))

        elif sort_by == SortBy.NAME:

            def key(x):
                _, attr = x
                return attr.name.lower()

        elif sort_by == SortBy.ABSOLUTE:

            def key(x):
                i, attr = x
                if attr.is_discrete:
                    ptp = self.points[i][class_value].ptp()
                else:
                    coef = np.abs(self.log_reg_coeffs_orig[i][class_value]).mean()
                    ptp = coef * np.ptp(self.log_reg_cont_data_extremes[i][class_value])
                return -ptp

        elif sort_by == SortBy.POSITIVE:

            def key(x):
                i, attr = x
                max_value = (self.points[i][class_value].max()
                             if attr.is_discrete else
                             np.mean(self.log_reg_cont_data_extremes[i][class_value]))
                return -max_value

        elif sort_by == SortBy.NEGATIVE:

            def key(x):
                i, attr = x
                min_value = (self.points[i][class_value].min()
                             if attr.is_discrete else
                             np.mean(self.log_reg_cont_data_extremes[i][class_value]))
                return min_value

        return sorted(enumerate(attrs), key=key)


    def create_footer_nomogram(self, probs_text, d, minimums,
                               max_width, name_offset):
        eps, d_ = 0.05, 1
        k = - np.log(self.p / (1 - self.p)) if self.p is not None else - self.b0
        min_sum = k[self.target_class_index] - np.log((1 - eps) / eps)
        max_sum = k[self.target_class_index] - np.log(eps / (1 - eps))
        if self.align == OWNomogram.ALIGN_LEFT:
            max_sum = max_sum - sum(minimums)
            min_sum = min_sum - sum(minimums)
            for i in range(len(k)):
                k[i] = k[i] - sum([min(q) for q in [p[i] for p in self.points]])
        if self.scale == OWNomogram.POINT_SCALE:
            min_sum *= d
            max_sum *= d
            d_ = d

        values = self.get_ruler_values(min_sum, max_sum, max_width)
        min_sum, max_sum = min(values), max(values)
        diff_ = np.nan_to_num(max_sum - min_sum)
        scale_x = max_width / diff_ if diff_ else max_width
        cls_var, cls_index = self.domain.class_var, self.target_class_index
        nomogram_footer = NomogramItem()

        def get_normalized_probabilities(val):
            if not self.normalize_probabilities:
                return 1 / (1 + np.exp(k[cls_index] - val / d_))
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum

        def get_points(prob):
            if not self.normalize_probabilities:
                return (k[cls_index] - np.log(1 / prob - 1)) * d_
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_

        probs_item = ProbabilitiesRulerItem(
            probs_text, values, scale_x, name_offset, - scale_x * min_sum,
            get_points=get_points,
            title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]),
            get_probabilities=get_normalized_probabilities)
        nomogram_footer.add_items([probs_item])
        return probs_item, nomogram_footer

    def __get_totals_for_class_values(self, minimums):
        cls_index = self.target_class_index
        marker_values = self.scale_marker_values(self.feature_marker_values)
        totals = np.full(len(self.domain.class_var.values), np.nan)
        totals[cls_index] = marker_values.sum()
        for i in range(len(self.domain.class_var.values)):
            if i == cls_index:
                continue
            coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points]
            points = [p[cls_index] for p in self.points]
            total = sum([self.get_points_from_coeffs(v, c, p) for (v, c, p)
                         in zip(self.feature_marker_values, coeffs, points)])
            if self.align == OWNomogram.ALIGN_LEFT:
                points = [p - m for m, p in zip(minimums, points)]
                total -= sum([min(p) for p in [p[i] for p in self.points]])
            d = 100 / max(max(abs(p)) for p in points)
            if self.scale == OWNomogram.POINT_SCALE:
                total *= d
            totals[i] = total
        assert not np.any(np.isnan(totals))
        return totals

    def set_feature_marker_values(self):
        if not (len(self.points) and len(self.feature_items)):
            return
        if not len(self.feature_marker_values):
            self._init_feature_marker_values()
        marker_values = self.scale_marker_values(self.feature_marker_values)

        invisible_sum = 0
        for i in range(len(marker_values)):
            try:
                item = self.feature_items[i]
            except KeyError:
                invisible_sum += marker_values[i]
            else:
                item.dot.move_to_val(marker_values[i])

        item.dot.probs_dot.move_to_sum(invisible_sum)

    def _init_feature_marker_values(self):
        self.feature_marker_values = []
        cls_index = self.target_class_index
        instances = Table(self.domain, self.instances) \
            if self.instances else None
        values = []
        for i, attr in enumerate(self.domain.attributes):
            value, feature_val = 0, None
            if len(self.log_reg_coeffs):
                if attr.is_discrete:
                    ind, n = unique(self.data.X[:, i], return_counts=True)
                    feature_val = np.nan_to_num(ind[np.argmax(n)])
                else:
                    feature_val = nanmean(self.data.X[:, i])

            # If data is provided on a separate signal, use the first data
            # instance to position the points instead of the mean
            inst_in_dom = instances and attr in instances.domain
            if inst_in_dom and not np.isnan(instances[0][attr]):
                feature_val = instances[0][attr]

            if feature_val is not None:
                value = (self.points[i][cls_index][int(feature_val)]
                         if attr.is_discrete else
                         self.log_reg_coeffs_orig[i][cls_index][0] * feature_val)
            values.append(value)
        self.feature_marker_values = np.asarray(values)

    def clear_scene(self):
        self.feature_items = {}
        self.scale_marker_values = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.scene.clear()

    def send_report(self):
        self.report_plot()

    @staticmethod
    def reconstruct_domain(original, preprocessed):
        # abuse dict to make "in" comparisons faster
        attrs = OrderedDict()
        for attr in preprocessed.attributes:
            cv = attr._compute_value.variable._compute_value
            var = cv.variable if cv else original[attr.name]
            if var in attrs:    # the reason for OrderedDict
                continue
            attrs[var] = None   # we only need keys
        attrs = list(attrs.keys())
        return Domain(attrs, original.class_var, original.metas)

    @staticmethod
    def get_ruler_values(start, stop, max_width, round_to_nearest=True):
        if max_width == 0:
            return [0]
        diff = np.nan_to_num((stop - start) / max_width)
        if diff <= 0:
            return [0]
        decimals = int(np.floor(np.log10(diff)))
        if diff > 4 * pow(10, decimals):
            step = 5 * pow(10, decimals + 2)
        elif diff > 2 * pow(10, decimals):
            step = 2 * pow(10, decimals + 2)
        elif diff > 1 * pow(10, decimals):
            step = 1 * pow(10, decimals + 2)
        else:
            step = 5 * pow(10, decimals + 1)
        round_by = int(- np.floor(np.log10(step)))
        r = start % step
        if not round_to_nearest:
            _range = np.arange(start + step, stop + r, step) - r
            start, stop = np.floor(start * 100) / 100, np.ceil(stop * 100) / 100
            return np.round(np.hstack((start, _range, stop)), 2)
        return np.round(np.arange(start, stop + r + step, step) - r, round_by)

    @staticmethod
    def get_points_from_coeffs(current_value, coefficients, possible_values):
        if np.isnan(possible_values).any():
            return 0
        indices = np.argsort(possible_values)
        sorted_values = possible_values[indices]
        sorted_coefficients = coefficients[indices]
        for i, val in enumerate(sorted_values):
            if current_value < val:
                break
        diff = sorted_values[i] - sorted_values[i - 1]
        k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \
                                  (sorted_values[i] - sorted_values[i - 1])
        return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \
               sorted_coefficients[i] * sorted_values[i] * (1 - k)
Пример #6
0
class OWNomogram(OWWidget):
    name = "列线图"
    description = "朴素贝叶斯可视化的诺莫图和逻辑回归分类器"
    icon = "icons/Nomogram.svg"
    priority = 2000
    keywords = []

    class Inputs:
        classifier = Input("分类器", Model)
        data = Input("数据", Table)

    MAX_N_ATTRS = 1000
    POINT_SCALE = 0
    ALIGN_LEFT = 0
    ALIGN_ZERO = 1
    ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier)
    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    normalize_probabilities = Setting(False)
    scale = Setting(1)
    display_index = Setting(1)
    n_attributes = Setting(10)
    sort_index = Setting(SortBy.ABSOLUTE)
    cont_feature_dim_index = Setting(0)

    graph_name = "场景"

    class Error(OWWidget.Error):
        invalid_classifier = Msg("诺莫图只接受朴素贝叶斯和逻辑回归分类器")

    def __init__(self):
        super().__init__()
        self.instances = None
        self.domain = None
        self.data = None
        self.classifier = None
        self.align = OWNomogram.ALIGN_ZERO
        self.log_odds_ratios = []
        self.log_reg_coeffs = []
        self.log_reg_coeffs_orig = []
        self.log_reg_cont_data_extremes = []
        self.p = None
        self.b0 = None
        self.points = []
        self.feature_items = {}
        self.feature_marker_values = []
        self.scale_marker_values = lambda x: x
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.old_target_class_index = self.target_class_index
        self.repaint = False

        # GUI
        box = gui.vBox(self.controlArea, "目标类")
        self.class_combo = gui.comboBox(
            box, self, "target_class_index", callback=self._class_combo_changed,
            contentsLength=12)
        self.norm_check = gui.checkBox(
            box, self, "normalize_probabilities", "Normalize probabilities",
            hidden=True, callback=self.update_scene,
            tooltip="For multiclass data 1 vs. all probabilities do not"
                    " sum to 1 and therefore could be normalized.")

        self.scale_radio = gui.radioButtons(
            self.controlArea, self, "scale", ["点量表", "记录比值比"],
            box="规模", callback=self.update_scene)

        box = gui.vBox(self.controlArea, "显示特征")
        grid = QGridLayout()
        radio_group = gui.radioButtonsInBox(
            box, self, "display_index", [], orientation=grid,
            callback=self.update_scene)
        radio_all = gui.appendRadioButton(
            radio_group, "所有", addToLayout=False)
        radio_best = gui.appendRadioButton(
            radio_group, "最佳排名:", addToLayout=False)
        spin_box = gui.hBox(None, margin=0)
        self.n_spin = gui.spin(
            spin_box, self, "n_attributes", 1, self.MAX_N_ATTRS, label=" ",
            controlWidth=60, callback=self._n_spin_changed)
        grid.addWidget(radio_all, 1, 1)
        grid.addWidget(radio_best, 2, 1)
        grid.addWidget(spin_box, 2, 2)

        self.sort_combo = gui.comboBox(
            box, self, "sort_index", label="排名靠前:", items=SortBy.items(),
            orientation=Qt.Horizontal, callback=self.update_scene)

        self.cont_feature_dim_combo = gui.comboBox(
            box, self, "cont_feature_dim_index", label="数字特征: ",
            items=["1维投影", "二维曲线"], orientation=Qt.Horizontal,
            callback=self.update_scene)

        gui.rubber(self.controlArea)

        class _GraphicsView(QGraphicsView):
            def __init__(self, scene, parent, **kwargs):
                for k, v in dict(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 viewportUpdateMode=QGraphicsView.BoundingRectViewportUpdate,
                                 renderHints=(QPainter.Antialiasing |
                                              QPainter.TextAntialiasing |
                                              QPainter.SmoothPixmapTransform),
                                 alignment=(Qt.AlignTop |
                                            Qt.AlignLeft),
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.MinimumExpanding)).items():
                    kwargs.setdefault(k, v)

                super().__init__(scene, parent, **kwargs)

        class GraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                 styleSheet='QGraphicsView {background: white}')
                self.viewport().setMinimumWidth(300)  # XXX: This prevents some tests failing
                self._is_resizing = False

            w = self

            def resizeEvent(self, resizeEvent):
                # Recompute main scene on window width change
                if resizeEvent.size().width() != resizeEvent.oldSize().width():
                    self._is_resizing = True
                    self.w.update_scene()
                    self._is_resizing = False
                return super().resizeEvent(resizeEvent)

            def is_resizing(self):
                return self._is_resizing

            def sizeHint(self):
                return QSize(400, 200)

        class FixedSizeGraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.Minimum))

            def sizeHint(self):
                return QSize(400, 85)

        scene = self.scene = QGraphicsScene(self)

        top_view = self.top_view = FixedSizeGraphicsView(scene, self)
        mid_view = self.view = GraphicsView(scene, self)
        bottom_view = self.bottom_view = FixedSizeGraphicsView(scene, self)

        for view in (top_view, mid_view, bottom_view):
            self.mainArea.layout().addWidget(view)

    def _class_combo_changed(self):
        with np.errstate(invalid='ignore'):
            coeffs = [np.nan_to_num(p[self.target_class_index] /
                                    p[self.old_target_class_index])
                      for p in self.points]
        points = [p[self.old_target_class_index] for p in self.points]
        self.feature_marker_values = [
            self.get_points_from_coeffs(v, c, p) for (v, c, p) in
            zip(self.feature_marker_values, coeffs, points)]
        self.feature_marker_values = np.asarray(self.feature_marker_values)
        self.update_scene()
        self.old_target_class_index = self.target_class_index

    def _n_spin_changed(self):
        self.display_index = 1
        self.update_scene()

    def update_controls(self):
        self.class_combo.clear()
        self.norm_check.setHidden(True)
        self.cont_feature_dim_combo.setEnabled(True)
        if self.domain is not None:
            self.class_combo.addItems(self.domain.class_vars[0].values)
            if len(self.domain.attributes) > self.MAX_N_ATTRS:
                self.display_index = 1
            if len(self.domain.class_vars[0].values) > 2:
                self.norm_check.setHidden(False)
            if not self.domain.has_continuous_attributes():
                self.cont_feature_dim_combo.setEnabled(False)
                self.cont_feature_dim_index = 0
        model = self.sort_combo.model()
        item = model.item(SortBy.POSITIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        item = model.item(SortBy.NEGATIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        self.align = OWNomogram.ALIGN_ZERO
        if self.classifier and isinstance(self.classifier,
                                          LogisticRegressionClassifier):
            self.align = OWNomogram.ALIGN_LEFT

    @Inputs.data
    def set_data(self, data):
        self.instances = data
        self.feature_marker_values = []
        self.set_feature_marker_values()
        self.update_scene()

    @Inputs.classifier
    def set_classifier(self, classifier):
        self.closeContext()
        self.classifier = classifier
        self.Error.clear()
        if self.classifier and not isinstance(self.classifier, self.ACCEPTABLE):
            self.Error.invalid_classifier()
            self.classifier = None
        self.domain = self.classifier.domain if self.classifier else None
        self.data = None
        self.calculate_log_odds_ratios()
        self.calculate_log_reg_coefficients()
        self.update_controls()
        self.target_class_index = 0
        self.openContext(self.domain.class_var if self.domain is not None
                         else None)
        self.points = self.log_odds_ratios or self.log_reg_coeffs
        self.feature_marker_values = []
        self.old_target_class_index = self.target_class_index
        self.update_scene()

    def calculate_log_odds_ratios(self):
        self.log_odds_ratios = []
        self.p = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, NaiveBayesModel):
            return

        log_cont_prob = self.classifier.log_cont_prob
        class_prob = self.classifier.class_prob
        for i in range(len(self.domain.attributes)):
            ca = np.exp(log_cont_prob[i]) * class_prob[:, None]
            _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None]
            self.log_odds_ratios.append(np.log(_or))
        self.p = class_prob

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = self.classifier.original_data.transform(self.domain)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)),
                                   min_values, max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack((coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

    def update_scene(self):
        self.clear_scene()
        if self.domain is None or not len(self.points[0]):
            return

        n_attrs = self.n_attributes if self.display_index else int(1e10)
        attr_inds, attributes = zip(*self.get_ordered_attributes()[:n_attrs])

        name_items = [QGraphicsTextItem(attr.name) for attr in attributes]
        point_text = QGraphicsTextItem("Points")
        probs_text = QGraphicsTextItem("Probabilities (%)")
        all_items = name_items + [point_text, probs_text]
        name_offset = -max(t.boundingRect().width() for t in all_items) - 10
        w = self.view.viewport().rect().width()
        max_width = w + name_offset - 30

        points = [self.points[i][self.target_class_index]
                  for i in attr_inds]
        if self.align == OWNomogram.ALIGN_LEFT:
            points = [p - p.min() for p in points]
        max_ = np.nan_to_num(max(max(abs(p)) for p in points))
        d = 100 / max_ if max_ else 1
        minimums = [p[self.target_class_index].min() for p in self.points]
        if self.scale == OWNomogram.POINT_SCALE:
            points = [p * d for p in points]

            if self.align == OWNomogram.ALIGN_LEFT:
                self.scale_marker_values = lambda x: (x - minimums) * d
            else:
                self.scale_marker_values = lambda x: x * d
        else:
            if self.align == OWNomogram.ALIGN_LEFT:
                self.scale_marker_values = lambda x: x - minimums
            else:
                self.scale_marker_values = lambda x: x

        point_item, nomogram_head = self.create_main_nomogram(
            attributes, attr_inds,
            name_items, points, max_width, point_text, name_offset)
        probs_item, nomogram_foot = self.create_footer_nomogram(
            probs_text, d, minimums, max_width, name_offset)
        for item in self.feature_items.values():
            item.dot.point_dot = point_item.dot
            item.dot.probs_dot = probs_item.dot
            item.dot.vertical_line = self.hidden_vertical_line

        self.nomogram = nomogram = NomogramItem()
        nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot])
        self.scene.addItem(nomogram)

        self.set_feature_marker_values()

        rect = QRectF(self.scene.itemsBoundingRect().x(),
                      self.scene.itemsBoundingRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height()).adjusted(10, 0, 20, 0)
        self.scene.setSceneRect(rect)

        # Clip top and bottom (60 and 150) parts from the main view
        self.view.setSceneRect(rect.x(), rect.y() + 80, rect.width() - 10, rect.height() - 160)
        self.view.viewport().setMaximumHeight(rect.height() - 160)
        # Clip main part from top/bottom views
        # below point values are imprecise (less/more than required) but this
        # is not a problem due to clipped scene content still being drawn
        self.top_view.setSceneRect(rect.x(), rect.y() + 3, rect.width() - 10, 20)
        self.bottom_view.setSceneRect(rect.x(), rect.height() - 110, rect.width() - 10, 30)

    def create_main_nomogram(self, attributes, attr_inds, name_items, points,
                             max_width, point_text, name_offset):
        cls_index = self.target_class_index
        min_p = min(p.min() for p in points)
        max_p = max(p.max() for p in points)
        values = self.get_ruler_values(min_p, max_p, max_width)
        min_p, max_p = min(values), max(values)
        diff_ = np.nan_to_num(max_p - min_p)
        scale_x = max_width / diff_ if diff_ else max_width

        nomogram_header = NomogramItem()
        point_item = RulerItem(point_text, values, scale_x, name_offset,
                               - scale_x * min_p)
        point_item.setPreferredSize(point_item.preferredWidth(), 35)
        nomogram_header.add_items([point_item])

        self.nomogram_main = NomogramItem()
        cont_feature_item_class = ContinuousFeature2DItem if \
            self.cont_feature_dim_index else ContinuousFeatureItem

        feature_items = [
            DiscreteFeatureItem(
                name_item, attr.values, point,
                scale_x, name_offset, - scale_x * min_p)
            if attr.is_discrete else
            cont_feature_item_class(
                name_item, self.log_reg_cont_data_extremes[i][cls_index],
                self.get_ruler_values(
                    point.min(), point.max(),
                    scale_x * point.ptp(), False),
                scale_x, name_offset, - scale_x * min_p)
            for i, attr, name_item, point in zip(attr_inds, attributes, name_items, points)]

        self.nomogram_main.add_items(feature_items)
        self.feature_items = OrderedDict(sorted(zip(attr_inds, feature_items)))

        x = - scale_x * min_p
        y = self.nomogram_main.layout().preferredHeight() + 10
        self.vertical_line = QGraphicsLineItem(x, -6, x, y)
        self.vertical_line.setPen(QPen(Qt.DotLine))
        self.vertical_line.setParentItem(point_item)
        self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y)
        pen = QPen(Qt.DashLine)
        pen.setBrush(QColor(Qt.red))
        self.hidden_vertical_line.setPen(pen)
        self.hidden_vertical_line.setParentItem(point_item)

        return point_item, nomogram_header

    def get_ordered_attributes(self):
        """Return (in_domain_index, attr) pairs, ordered by method in SortBy combo"""
        if self.domain is None or not self.domain.attributes:
            return []

        attrs = self.domain.attributes
        sort_by = self.sort_index
        class_value = self.target_class_index

        if sort_by == SortBy.NO_SORTING:
            return list(enumerate(attrs))

        elif sort_by == SortBy.NAME:

            def key(x):
                _, attr = x
                return attr.name.lower()

        elif sort_by == SortBy.ABSOLUTE:

            def key(x):
                i, attr = x
                if attr.is_discrete:
                    ptp = self.points[i][class_value].ptp()
                else:
                    coef = np.abs(self.log_reg_coeffs_orig[i][class_value]).mean()
                    ptp = coef * np.ptp(self.log_reg_cont_data_extremes[i][class_value])
                return -ptp

        elif sort_by == SortBy.POSITIVE:

            def key(x):
                i, attr = x
                max_value = (self.points[i][class_value].max()
                             if attr.is_discrete else
                             np.mean(self.log_reg_cont_data_extremes[i][class_value]))
                return -max_value

        elif sort_by == SortBy.NEGATIVE:

            def key(x):
                i, attr = x
                min_value = (self.points[i][class_value].min()
                             if attr.is_discrete else
                             np.mean(self.log_reg_cont_data_extremes[i][class_value]))
                return min_value

        return sorted(enumerate(attrs), key=key)


    def create_footer_nomogram(self, probs_text, d, minimums,
                               max_width, name_offset):
        # pylint: disable=invalid-unary-operand-type
        eps, d_ = 0.05, 1
        k = - np.log(self.p / (1 - self.p)) if self.p is not None else - self.b0
        min_sum = k[self.target_class_index] - np.log((1 - eps) / eps)
        max_sum = k[self.target_class_index] - np.log(eps / (1 - eps))
        if self.align == OWNomogram.ALIGN_LEFT:
            max_sum = max_sum - sum(minimums)
            min_sum = min_sum - sum(minimums)
            for i in range(len(k)):  # pylint: disable=consider-using-enumerate
                k[i] = k[i] - sum([min(q) for q in [p[i] for p in self.points]])
        if self.scale == OWNomogram.POINT_SCALE:
            min_sum *= d
            max_sum *= d
            d_ = d

        values = self.get_ruler_values(min_sum, max_sum, max_width)
        min_sum, max_sum = min(values), max(values)
        diff_ = np.nan_to_num(max_sum - min_sum)
        scale_x = max_width / diff_ if diff_ else max_width
        cls_var, cls_index = self.domain.class_var, self.target_class_index
        nomogram_footer = NomogramItem()

        def get_normalized_probabilities(val):
            if not self.normalize_probabilities:
                return 1 / (1 + np.exp(k[cls_index] - val / d_))
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum

        def get_points(prob):
            if not self.normalize_probabilities:
                return (k[cls_index] - np.log(1 / prob - 1)) * d_
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_

        probs_item = ProbabilitiesRulerItem(
            probs_text, values, scale_x, name_offset, - scale_x * min_sum,
            get_points=get_points,
            title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]),
            get_probabilities=get_normalized_probabilities)
        nomogram_footer.add_items([probs_item])
        return probs_item, nomogram_footer

    def __get_totals_for_class_values(self, minimums):
        cls_index = self.target_class_index
        marker_values = self.scale_marker_values(self.feature_marker_values)
        totals = np.full(len(self.domain.class_var.values), np.nan)
        totals[cls_index] = marker_values.sum()
        for i in range(len(self.domain.class_var.values)):
            if i == cls_index:
                continue
            coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points]
            points = [p[cls_index] for p in self.points]
            total = sum([self.get_points_from_coeffs(v, c, p) for (v, c, p)
                         in zip(self.feature_marker_values, coeffs, points)])
            if self.align == OWNomogram.ALIGN_LEFT:
                points = [p - m for m, p in zip(minimums, points)]
                total -= sum([min(p) for p in [p[i] for p in self.points]])
            d = 100 / max(max(abs(p)) for p in points)
            if self.scale == OWNomogram.POINT_SCALE:
                total *= d
            totals[i] = total
        assert not np.any(np.isnan(totals))
        return totals

    def set_feature_marker_values(self):
        if not (len(self.points) and len(self.feature_items)):
            return
        if not len(self.feature_marker_values):
            self._init_feature_marker_values()
        marker_values = self.scale_marker_values(self.feature_marker_values)

        invisible_sum = 0
        for i, marker in enumerate(marker_values):
            try:
                item = self.feature_items[i]
            except KeyError:
                invisible_sum += marker
            else:
                item.dot.move_to_val(marker)

        item.dot.probs_dot.move_to_sum(invisible_sum)

    def _init_feature_marker_values(self):
        self.feature_marker_values = []
        cls_index = self.target_class_index
        instances = Table(self.domain, self.instances) \
            if self.instances else None
        values = []
        for i, attr in enumerate(self.domain.attributes):
            value, feature_val = 0, None
            if len(self.log_reg_coeffs):
                if attr.is_discrete:
                    ind, n = unique(self.data.X[:, i], return_counts=True)
                    feature_val = np.nan_to_num(ind[np.argmax(n)])
                else:
                    feature_val = nanmean(self.data.X[:, i])

            # If data is provided on a separate signal, use the first data
            # instance to position the points instead of the mean
            inst_in_dom = instances and attr in instances.domain
            if inst_in_dom and not np.isnan(instances[0][attr]):
                feature_val = instances[0][attr]

            if feature_val is not None:
                value = (self.points[i][cls_index][int(feature_val)]
                         if attr.is_discrete else
                         self.log_reg_coeffs_orig[i][cls_index][0] * feature_val)
            values.append(value)
        self.feature_marker_values = np.asarray(values)

    def clear_scene(self):
        self.feature_items = {}
        self.scale_marker_values = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.scene.clear()

    def send_report(self):
        self.report_plot()

    @staticmethod
    def reconstruct_domain(original, preprocessed):
        # abuse dict to make "in" comparisons faster
        attrs = OrderedDict()
        for attr in preprocessed.attributes:
            cv = attr._compute_value.variable._compute_value
            var = cv.variable if cv else original[attr.name]
            if var in attrs:    # the reason for OrderedDict
                continue
            attrs[var] = None   # we only need keys
        attrs = list(attrs.keys())
        return Domain(attrs, original.class_var, original.metas)

    @staticmethod
    def get_ruler_values(start, stop, max_width, round_to_nearest=True):
        if max_width == 0:
            return [0]
        diff = np.nan_to_num((stop - start) / max_width)
        if diff <= 0:
            return [0]
        decimals = int(np.floor(np.log10(diff)))
        if diff > 4 * pow(10, decimals):
            step = 5 * pow(10, decimals + 2)
        elif diff > 2 * pow(10, decimals):
            step = 2 * pow(10, decimals + 2)
        elif diff > 1 * pow(10, decimals):
            step = 1 * pow(10, decimals + 2)
        else:
            step = 5 * pow(10, decimals + 1)
        round_by = int(- np.floor(np.log10(step)))
        r = start % step
        if not round_to_nearest:
            _range = np.arange(start + step, stop + r, step) - r
            start, stop = np.floor(start * 100) / 100, np.ceil(stop * 100) / 100
            return np.round(np.hstack((start, _range, stop)), 2)
        return np.round(np.arange(start, stop + r + step, step) - r, round_by)

    @staticmethod
    def get_points_from_coeffs(current_value, coefficients, possible_values):
        if np.isnan(possible_values).any():
            return 0
        # pylint: disable=undefined-loop-variable
        indices = np.argsort(possible_values)
        sorted_values = possible_values[indices]
        sorted_coefficients = coefficients[indices]
        for i, val in enumerate(sorted_values):
            if current_value < val:
                break
        diff = sorted_values[i] - sorted_values[i - 1]
        k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \
                                  (sorted_values[i] - sorted_values[i - 1])
        return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \
               sorted_coefficients[i] * sorted_values[i] * (1 - k)