示例#1
0
class OWDistributions(widget.OWWidget):
    name = "Distributions"
    description = "Display value distributions of a data feature in a graph."
    icon = "icons/Distribution.svg"
    priority = 100
    inputs = [InputSignal("Data", Orange.data.Table, "set_data",
                          doc="Set the input data set")]

    settingsHandler = settings.DomainContextHandler(
        match_values=settings.DomainContextHandler.MATCH_VALUES_ALL)
    #: Selected variable index
    variable_idx = settings.ContextSetting(-1)
    #: Selected group variable
    groupvar_idx = settings.ContextSetting(0)

    relative_freq = settings.Setting(False)
    disc_cont = settings.Setting(False)

    smoothing_index = settings.Setting(5)
    show_prob = settings.ContextSetting(0)

    want_graph = True
    ASH_HIST = 50

    bins = [ 2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50 ]
    smoothing_facs = list(reversed([ 0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 4, 6, 10 ]))

    def __init__(self):
        super().__init__()
        self.data = None

        self.distributions = None
        self.contingencies = None
        self.var = self.cvar = None
        varbox = gui.widgetBox(self.controlArea, "Variable")

        self.varmodel = itemmodels.VariableListModel()
        self.groupvarmodel = []

        self.varview = QtGui.QListView(
            selectionMode=QtGui.QListView.SingleSelection)
        self.varview.setSizePolicy(
            QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding)
        self.varview.setModel(self.varmodel)
        self.varview.setSelectionModel(
            itemmodels.ListSingleSelectionModel(self.varmodel))
        self.varview.selectionModel().selectionChanged.connect(
            self._on_variable_idx_changed)
        varbox.layout().addWidget(self.varview)

        box = gui.widgetBox(self.controlArea, "Precision")

        gui.separator(self.controlArea, 4, 4)

        box2 = gui.widgetBox(box, orientation="horizontal")
        self.l_smoothing_l = gui.widgetLabel(box2, "Smooth")
        gui.hSlider(box2, self, "smoothing_index",
                    minValue=0, maxValue=len(self.smoothing_facs) - 1,
                    callback=self._on_set_smoothing, createLabel=False)
        self.l_smoothing_r = gui.widgetLabel(box2, "Precise")

        self.cb_disc_cont = gui.checkBox(
            gui.indentedBox(box, sep=4),
            self, "disc_cont", "Bin continuous variables",
            callback=self._on_groupvar_idx_changed)

        box = gui.widgetBox(self.controlArea, "Group by")
        self.icons = gui.attributeIconDict
        self.groupvarview = gui.comboBox(box, self, "groupvar_idx",
             callback=self._on_groupvar_idx_changed, valueType=str,
             contentsLength=12)
        box2 = gui.indentedBox(box, sep=4)
        self.cb_rel_freq = gui.checkBox(
            box2, self, "relative_freq", "Show relative frequencies",
            callback=self._on_relative_freq_changed)
        gui.separator(box2)
        self.cb_prob = gui.comboBox(
            box2, self, "show_prob", label="Show probabilities",
            orientation="horizontal",
            callback=self._on_relative_freq_changed)

        plotview = pg.PlotWidget(background=None)
        self.mainArea.layout().addWidget(plotview)
        w = QtGui.QLabel()
        w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed)
        self.mainArea.layout().addWidget(w, Qt.AlignCenter)

        self.plot = pg.PlotItem()
        self.plot.hideButtons()
        plotview.setCentralItem(self.plot)

        self.plot_prob = pg.ViewBox()
        self.plot.hideAxis('right')
        self.plot.scene().addItem(self.plot_prob)
        self.plot.getAxis("right").linkToView(self.plot_prob)
        self.plot.getAxis("right").setLabel("Probability")
        self.plot_prob.setZValue(10)
        self.plot_prob.setXLink(self.plot)
        self.update_views()
        self.plot.vb.sigResized.connect(self.update_views)
        self.plot_prob.setRange(yRange=[0,1])

        def disable_mouse(plot):
            plot.setMouseEnabled(False, False)
            plot.setMenuEnabled(False)

        disable_mouse(self.plot.getViewBox())
        disable_mouse(self.plot_prob)

        pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text))
        for axis in ("left", "bottom"):
            self.plot.getAxis(axis).setPen(pen)

        self._legend = LegendItem()
        self._legend.setParentItem(self.plot.getViewBox())
        self._legend.hide()
        self._legend.anchor((1, 0), (1, 0))
        self.graphButton.clicked.connect(self.save_graph)

    def update_views(self):
        self.plot_prob.setGeometry(self.plot.vb.sceneBoundingRect())
        self.plot_prob.linkedViewChanged(self.plot.vb, self.plot_prob.XAxis)

    def set_data(self, data):
        self.closeContext()
        self.clear()
        self.data = data
        if self.data is not None:
            domain = self.data.domain
            self.varmodel[:] = list(domain)

            self.groupvarview.clear()
            self.groupvarmodel = \
                ["(None)"] + [var for var in domain if var.is_discrete]
            self.groupvarview.addItem("(None)")
            for var in self.groupvarmodel[1:]:
                self.groupvarview.addItem(self.icons[var], var.name)
            if domain.has_discrete_class:
                self.groupvar_idx = \
                    self.groupvarmodel.index(domain.class_var)
            self.openContext(domain)
            self.variable_idx = min(max(self.variable_idx, 0),
                                    len(self.varmodel) - 1)
            self.groupvar_idx = min(max(self.groupvar_idx, 0),
                                    len(self.groupvarmodel) - 1)
            itemmodels.select_row(self.varview, self.variable_idx)
            self._setup()

    def clear(self):
        self.plot.clear()
        self.plot_prob.clear()
        self.varmodel[:] = []
        self.groupvarmodel = []
        self.variable_idx = -1
        self.groupvar_idx = 0
        self._legend.clear()
        self._legend.hide()

    def _setup_smoothing(self):
        if not self.disc_cont and self.var and self.var.is_continuous:
            self.cb_disc_cont.setText("Bin continuous variables")
            self.l_smoothing_l.setText("Smooth")
            self.l_smoothing_r.setText("Precise")
        else:
            self.cb_disc_cont.setText("Bin continuous variables into {} bins".
                                      format(self.bins[self.smoothing_index]))
            self.l_smoothing_l.setText(" " + str(self.bins[0]))
            self.l_smoothing_r.setText(" " + str(self.bins[-1]))

    def _setup(self):
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self._legend.hide()

        varidx = self.variable_idx
        self.var = self.cvar = None
        if varidx >= 0:
            self.var = self.varmodel[varidx]
        if self.groupvar_idx > 0:
            self.cvar = self.groupvarmodel[self.groupvar_idx]
            self.cb_prob.clear()
            self.cb_prob.addItem("(None)")
            self.cb_prob.addItems(self.cvar.values)
            self.cb_prob.addItem("(All)")
            self.show_prob = min(max(self.show_prob, 0),
                    len(self.cvar.values) + 1)
        data = self.data
        self._setup_smoothing()
        if self.var is None:
            return
        if self.disc_cont:
            data = self.data[:, (self.var, self.cvar) if self.cvar else self.var ]
            disc = Orange.preprocess.discretize.EqualWidth(n=self.bins[self.smoothing_index])
            data = Orange.preprocess.Discretize(data, method=disc)
            self.var = data.domain.variables[0]
        self.set_left_axis_name()
        self.enable_disable_rel_freq()
        if self.cvar:
            self.contingencies = \
                contingency.get_contingency(data, self.var, self.cvar)
            self.display_contingency()
        else:
            self.distributions = \
                distribution.get_distribution(data, self.var)
            self.display_distribution()
        self.plot.autoRange()

    def display_distribution(self):
        dist = self.distributions
        var = self.var
        assert len(dist) > 0
        self.plot.clear()
        self.plot_prob.clear()
        self.plot.hideAxis('right')

        bottomaxis = self.plot.getAxis("bottom")
        bottomaxis.setLabel(var.name)

        self.set_left_axis_name()
        if var and var.is_continuous:
            bottomaxis.setTicks(None)
            edges, curve = ash_curve(dist, None, m=OWDistributions.ASH_HIST,
                smoothing_factor=self.smoothing_facs[self.smoothing_index])
            edges = edges + (edges[1] - edges[0])/2
            edges = edges[:-1]
            item = pg.PlotCurveItem()
            pen = QtGui.QPen(QtGui.QBrush(Qt.white), 3)
            pen.setCosmetic(True)
            item.setData(edges, curve, antialias=True, stepMode=False,
                         fillLevel=0, brush=QtGui.QBrush(Qt.gray),
                         pen=pen)
            self.plot.addItem(item)
        else:
            bottomaxis.setTicks([list(enumerate(var.values))])
            for i, w in enumerate(dist):
                geom = QtCore.QRectF(i - 0.33, 0, 0.66, w)
                item = DistributionBarItem(geom, [1.0],
                                           [QtGui.QColor(128, 128, 128)])
                self.plot.addItem(item)

    def _on_relative_freq_changed(self):
        self.set_left_axis_name()
        if self.cvar and self.cvar.is_discrete:
            self.display_contingency()
        else:
            self.display_distribution()
        self.plot.autoRange()

    def display_contingency(self):
        """
        Set the contingency to display.
        """
        cont = self.contingencies
        var, cvar = self.var, self.cvar
        assert len(cont) > 0
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()

        if self.show_prob:
            self.plot.showAxis('right')
        else:
            self.plot.hideAxis('right')

        bottomaxis = self.plot.getAxis("bottom")
        bottomaxis.setLabel(var.name)

        cvar_values = cvar.values
        palette = colorpalette.ColorPaletteGenerator(len(cvar_values))
        colors = [palette[i].lighter() for i in range(len(cvar_values))]

        if var and var.is_continuous:
            bottomaxis.setTicks(None)

            weights, cols, cvar_values, curves = [], [], [], []
            for i, dist in enumerate(cont):
                v, W = dist
                if len(v):
                    weights.append(numpy.sum(W))
                    cols.append(colors[i])
                    cvar_values.append(cvar.values[i])
                    curves.append(ash_curve(dist, cont,  m=OWDistributions.ASH_HIST,
                        smoothing_factor=self.smoothing_facs[self.smoothing_index]))
            weights = numpy.array(weights)
            sumw = numpy.sum(weights)
            weights /= sumw
            colors = cols
            curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)]
            ncval = len(cvar_values)

            curvesline = [] #from histograms to lines
            for (X,Y) in curves:
                X = X + (X[1] - X[0])/2
                X = X[:-1]
                X = numpy.array(X)
                Y = numpy.array(Y)
                curvesline.append((X,Y))

            for t in [ "fill", "line" ]:
                for (X, Y), color, w in reversed(list(zip(curvesline, colors, weights))):
                    item = pg.PlotCurveItem()
                    pen = QtGui.QPen(QtGui.QBrush(color), 3)
                    pen.setCosmetic(True)
                    color = QtGui.QColor(color)
                    color.setAlphaF(0.2)
                    item.setData(X, Y/(w if self.relative_freq else 1), antialias=True, stepMode=False,
                         fillLevel=0 if t == "fill" else None,
                         brush=QtGui.QBrush(color), pen=pen)
                    self.plot.addItem(item)

            if self.show_prob:
                M_EST = 5 #for M estimate
                all_X = numpy.array(numpy.unique(numpy.hstack([X for X,_ in curvesline])))
                inter_X = numpy.array(numpy.linspace(all_X[0], all_X[-1], len(all_X)*2))
                curvesinterp = [ numpy.interp(inter_X, X, Y) for (X,Y) in curvesline ]
                sumprob = numpy.sum(curvesinterp, axis=0)
                # allcorrection = M_EST/sumw*numpy.sum(sumprob)/len(inter_X)
                legal = sumprob > 0.05 * numpy.max(sumprob)

                i = len(curvesinterp) + 1
                show_all = self.show_prob == i
                for Y, color in reversed(list(zip(curvesinterp, colors))):
                    i -= 1
                    if show_all or self.show_prob == i:
                        item = pg.PlotCurveItem()
                        pen = QtGui.QPen(QtGui.QBrush(color), 3, style=QtCore.Qt.DotLine)
                        pen.setCosmetic(True)
                        #prob = (Y+allcorrection/ncval)/(sumprob+allcorrection)
                        prob = Y[legal] / sumprob[legal]
                        item.setData(inter_X[legal], prob, antialias=True, stepMode=False,
                             fillLevel=None, brush=None, pen=pen)
                        self.plot_prob.addItem(item)

        elif var and var.is_discrete:
            bottomaxis.setTicks([list(enumerate(var.values))])

            cont = numpy.array(cont)
            ncval = len(cvar_values)

            maxh = 0 #maximal column height
            maxrh = 0 #maximal relative column height
            scvar = cont.sum(axis=1)
            #a cvar with sum=0 with allways have distribution counts 0,
            #therefore we can divide it by anything
            scvar[scvar==0] = 1
            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                maxh = max(maxh, max(dist))
                maxrh = max(maxrh, max(dist/scvar))

            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                dsum = sum(dist)
                geom = QtCore.QRectF(i - 0.333, 0, 0.666, maxrh
                                     if self.relative_freq else maxh)
                if self.show_prob:
                    prob = dist / dsum
                    ci = 1.96 * numpy.sqrt(prob * (1 - prob) / dsum)
                else:
                    ci = None
                item = DistributionBarItem(geom, dist/scvar/maxrh
                                           if self.relative_freq
                                           else dist/maxh, colors)
                self.plot.addItem(item)

                if self.show_prob:
                    for ic, a in enumerate(dist):
                        if self.show_prob - 1 != ic and \
                                self.show_prob - 1 != len(dist):
                            continue
                        position = -0.333 + ((ic+0.5)*0.666/len(dist))
                        if dsum < 1e-6:
                            continue
                        prob = a / dsum
                        if not 1e-6 < prob < 1 - 1e-6:
                            continue
                        ci = 1.96 * sqrt(prob * (1 - prob) / dsum)
                        mark = pg.ScatterPlotItem()
                        bar = pg.ErrorBarItem()
                        pen = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0)), 1)
                        pen.setCosmetic(True)
                        bar.setData(x=[i+position], y=[prob],
                                    bottom=min(numpy.array([ci]), prob),
                                    top=min(numpy.array([ci]), 1 - prob),
                                     beam=numpy.array([0.05]),
                                     brush=QtGui.QColor(1), pen=pen)
                        mark.setData([i+position], [prob], antialias=True, symbol="o",
                                 fillLevel=None, pxMode=True, size=10,
                                 brush=QtGui.QColor(colors[ic]), pen=pen)
                        self.plot_prob.addItem(bar)
                        self.plot_prob.addItem(mark)

        for color, name in zip(colors, cvar_values):
            self._legend.addItem(
                ScatterPlotItem(pen=color, brush=color, size=10, shape="s"),
                escape(name)
            )
        self._legend.show()

    def set_left_axis_name(self):
        set_label = self.plot.getAxis("left").setLabel
        if self.var and self.var.is_continuous:
            set_label(["Density", "Relative density"]
                      [self.cvar is not None and self.relative_freq])
        else:
            set_label(["Frequency", "Relative frequency"]
                      [self.cvar is not None and self.relative_freq])

    def enable_disable_rel_freq(self):
        self.cb_prob.setDisabled(self.var is None or self.cvar is None)
        self.cb_rel_freq.setDisabled(
            self.var is None or self.cvar is None)

    def _on_variable_idx_changed(self):
        self.variable_idx = selected_index(self.varview)
        self._setup()

    def _on_groupvar_idx_changed(self):
        self._setup()

    def _on_set_smoothing(self):
        self._setup()

    def onDeleteWidget(self):
        self.plot.clear()
        super().onDeleteWidget()

    def save_graph(self):
        from Orange.widgets.data.owsave import OWSave

        save_img = OWSave(data=self.plot,
                          file_formats=FileFormat.img_writers)
        save_img.exec_()
示例#2
0
class OWDistributions(widget.OWWidget):
    name = "Distributions"
    description = "Display value distributions of a data feature in a graph."
    icon = "icons/Distribution.svg"
    priority = 120

    class Inputs:
        data = Input("Data", Orange.data.Table, doc="Set the input data set")

    settingsHandler = settings.DomainContextHandler(
        match_values=settings.DomainContextHandler.MATCH_VALUES_ALL)
    #: Selected variable index
    variable_idx = settings.ContextSetting(-1)
    #: Selected group variable
    groupvar_idx = settings.ContextSetting(0)

    relative_freq = settings.Setting(False)
    disc_cont = settings.Setting(False)

    smoothing_index = settings.Setting(5)
    show_prob = settings.ContextSetting(0)

    graph_name = "plot"

    ASH_HIST = 50

    bins = [2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50]
    smoothing_facs = list(reversed([0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 4, 6, 10]))

    def __init__(self):
        super().__init__()
        self.data = None

        self.distributions = None
        self.contingencies = None
        self.var = self.cvar = None
        varbox = gui.vBox(self.controlArea, "Variable")

        self.varmodel = itemmodels.VariableListModel()
        self.groupvarmodel = []

        self.varview = QListView(
            selectionMode=QListView.SingleSelection)
        self.varview.setSizePolicy(
            QSizePolicy.Minimum, QSizePolicy.Expanding)
        self.varview.setModel(self.varmodel)
        self.varview.setSelectionModel(
            itemmodels.ListSingleSelectionModel(self.varmodel))
        self.varview.selectionModel().selectionChanged.connect(
            self._on_variable_idx_changed)
        varbox.layout().addWidget(self.varview)

        box = gui.vBox(self.controlArea, "Precision")

        gui.separator(self.controlArea, 4, 4)

        box2 = gui.hBox(box)
        self.l_smoothing_l = gui.widgetLabel(box2, "Smooth")
        gui.hSlider(box2, self, "smoothing_index",
                    minValue=0, maxValue=len(self.smoothing_facs) - 1,
                    callback=self._on_set_smoothing, createLabel=False)
        self.l_smoothing_r = gui.widgetLabel(box2, "Precise")

        self.cb_disc_cont = gui.checkBox(
            gui.indentedBox(box, sep=4),
            self, "disc_cont", "Bin numeric variables",
            callback=self._on_groupvar_idx_changed,
            tooltip="Show numeric variables as categorical.")

        box = gui.vBox(self.controlArea, "Group by")
        self.icons = gui.attributeIconDict
        self.groupvarview = gui.comboBox(
            box, self, "groupvar_idx",
            callback=self._on_groupvar_idx_changed,
            valueType=str, contentsLength=12)
        box2 = gui.indentedBox(box, sep=4)
        self.cb_rel_freq = gui.checkBox(
            box2, self, "relative_freq", "Show relative frequencies",
            callback=self._on_relative_freq_changed,
            tooltip="Normalize probabilities so that probabilities "
                    "for each group-by value sum to 1.")
        gui.separator(box2)
        self.cb_prob = gui.comboBox(
            box2, self, "show_prob", label="Show probabilities:",
            orientation=Qt.Horizontal,
            callback=self._on_relative_freq_changed,
            tooltip="Show probabilities for a chosen group-by value "
                    "(at each point probabilities for all group-by values sum to 1).")

        self.plotview = pg.PlotWidget(background=None)
        self.plotview.setRenderHint(QPainter.Antialiasing)
        self.mainArea.layout().addWidget(self.plotview)
        w = QLabel()
        w.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
        self.mainArea.layout().addWidget(w, Qt.AlignCenter)
        self.ploti = pg.PlotItem()
        self.plot = self.ploti.vb
        self.ploti.hideButtons()
        self.plotview.setCentralItem(self.ploti)

        self.plot_prob = pg.ViewBox()
        self.ploti.hideAxis('right')
        self.ploti.scene().addItem(self.plot_prob)
        self.ploti.getAxis("right").linkToView(self.plot_prob)
        self.ploti.getAxis("right").setLabel("Probability")
        self.plot_prob.setZValue(10)
        self.plot_prob.setXLink(self.ploti)
        self.update_views()
        self.ploti.vb.sigResized.connect(self.update_views)
        self.plot_prob.setRange(yRange=[0, 1])

        def disable_mouse(plot):
            plot.setMouseEnabled(False, False)
            plot.setMenuEnabled(False)

        disable_mouse(self.plot)
        disable_mouse(self.plot_prob)

        self.tooltip_items = []
        self.plot.scene().installEventFilter(
            HelpEventDelegate(self.help_event, self))

        pen = QPen(self.palette().color(QPalette.Text))
        for axis in ("left", "bottom"):
            self.ploti.getAxis(axis).setPen(pen)

        self._legend = LegendItem()
        self._legend.setParentItem(self.plot)
        self._legend.hide()
        self._legend.anchor((1, 0), (1, 0))

    def update_views(self):
        self.plot_prob.setGeometry(self.plot.sceneBoundingRect())
        self.plot_prob.linkedViewChanged(self.plot, self.plot_prob.XAxis)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.clear()
        self.warning()
        self.data = data
        self.distributions = None
        self.contingencies = None
        if self.data is not None:
            if not self.data:
                self.warning("Empty input data cannot be visualized")
                return
            domain = self.data.domain
            self.varmodel[:] = list(domain.variables) + \
                               [meta for meta in domain.metas
                                if meta.is_continuous or meta.is_discrete]
            self.groupvarview.clear()
            self.groupvarmodel = \
                ["(None)"] + [var for var in domain.variables if var.is_discrete] + \
                [meta for meta in domain.metas if meta.is_discrete]
            self.groupvarview.addItem("(None)")
            for var in self.groupvarmodel[1:]:
                self.groupvarview.addItem(self.icons[var], var.name)
            if domain.has_discrete_class:
                self.groupvar_idx = \
                    self.groupvarmodel[1:].index(domain.class_var) + 1
            self.openContext(domain)
            self.variable_idx = min(max(self.variable_idx, 0),
                                    len(self.varmodel) - 1)
            self.groupvar_idx = min(max(self.groupvar_idx, 0),
                                    len(self.groupvarmodel) - 1)
            itemmodels.select_row(self.varview, self.variable_idx)
            self._setup()

    def clear(self):
        self.plot.clear()
        self.plot_prob.clear()
        self.varmodel[:] = []
        self.groupvarmodel = []
        self.variable_idx = -1
        self.groupvar_idx = 0
        self._legend.clear()
        self._legend.hide()
        self.groupvarview.clear()
        self.cb_prob.clear()

    def _setup_smoothing(self):
        if not self.disc_cont and self.var and self.var.is_continuous:
            self.cb_disc_cont.setText("Bin numeric variables")
            self.l_smoothing_l.setText("Smooth")
            self.l_smoothing_r.setText("Precise")
        else:
            self.cb_disc_cont.setText("Bin numeric variables into {} bins".
                                      format(self.bins[self.smoothing_index]))
            self.l_smoothing_l.setText(" " + str(self.bins[0]))
            self.l_smoothing_r.setText(" " + str(self.bins[-1]))

    @property
    def smoothing_factor(self):
        return self.smoothing_facs[self.smoothing_index]

    def _setup(self):
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self._legend.hide()

        varidx = self.variable_idx
        self.var = self.cvar = None
        if varidx >= 0:
            self.var = self.varmodel[varidx]
        if self.groupvar_idx > 0:
            self.cvar = self.groupvarmodel[self.groupvar_idx]
            self.cb_prob.clear()
            self.cb_prob.addItem("(None)")
            self.cb_prob.addItems(self.cvar.values)
            self.cb_prob.addItem("(All)")
            self.show_prob = min(max(self.show_prob, 0),
                                 len(self.cvar.values) + 1)
        data = self.data
        self._setup_smoothing()
        if self.var is None:
            return
        if self.disc_cont:
            domain = Orange.data.Domain(
                [self.var, self.cvar] if self.cvar else [self.var])
            data = Orange.data.Table(domain, data)
            disc = EqualWidth(n=self.bins[self.smoothing_index])
            data = Discretize(method=disc, remove_const=False)(data)
            self.var = data.domain[0]
        self.set_left_axis_name()
        self.enable_disable_rel_freq()
        if self.cvar:
            self.contingencies = \
                contingency.get_contingency(data, self.var, self.cvar)
            self.display_contingency()
        else:
            self.distributions = \
                distribution.get_distribution(data, self.var)
            self.display_distribution()
        self.plot.autoRange()

    def help_event(self, ev):
        self.plot.mapSceneToView(ev.scenePos())
        ctooltip = []
        for vb, item in self.tooltip_items:
            mouse_over_curve = isinstance(item, pg.PlotCurveItem) \
                and item.mouseShape().contains(vb.mapSceneToView(ev.scenePos()))
            mouse_over_bar = isinstance(item, DistributionBarItem) \
                and item.boundingRect().contains(vb.mapSceneToView(ev.scenePos()))
            if mouse_over_curve or mouse_over_bar:
                ctooltip.append(item.tooltip)
        if ctooltip:
            QToolTip.showText(ev.screenPos(), "\n\n".join(ctooltip), widget=self.plotview)
            return True
        return False

    def display_distribution(self):
        dist = self.distributions
        var = self.var
        if dist is None or not len(dist):
            return
        self.plot.clear()
        self.plot_prob.clear()
        self.ploti.hideAxis('right')
        self.tooltip_items = []

        bottomaxis = self.ploti.getAxis("bottom")
        bottomaxis.setLabel(var.name)
        bottomaxis.resizeEvent()

        self.set_left_axis_name()
        if var and var.is_continuous:
            bottomaxis.setTicks(None)
            if not len(dist[0]):
                return
            edges, curve = ash_curve(dist, None, m=OWDistributions.ASH_HIST,
                                     smoothing_factor=self.smoothing_factor)
            edges = edges + (edges[1] - edges[0])/2
            edges = edges[:-1]
            item = pg.PlotCurveItem()
            pen = QPen(QBrush(Qt.white), 3)
            pen.setCosmetic(True)
            item.setData(edges, curve, antialias=True, stepMode=False,
                         fillLevel=0, brush=QBrush(Qt.gray), pen=pen)
            self.plot.addItem(item)
            item.tooltip = "Density"
            self.tooltip_items.append((self.plot, item))
        else:
            bottomaxis.setTicks([list(enumerate(var.values))])
            for i, w in enumerate(dist):
                geom = QRectF(i - 0.33, 0, 0.66, w)
                item = DistributionBarItem(geom, [1.0],
                                           [QColor(128, 128, 128)])
                self.plot.addItem(item)
                item.tooltip = "Frequency for %s: %r" % (var.values[i], w)
                self.tooltip_items.append((self.plot, item))

    def _on_relative_freq_changed(self):
        self.set_left_axis_name()
        if self.cvar and self.cvar.is_discrete:
            self.display_contingency()
        else:
            self.display_distribution()
        self.plot.autoRange()

    def display_contingency(self):
        """
        Set the contingency to display.
        """
        cont = self.contingencies
        var, cvar = self.var, self.cvar
        if cont is None or not len(cont):
            return
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self.tooltip_items = []

        if self.show_prob:
            self.ploti.showAxis('right')
        else:
            self.ploti.hideAxis('right')

        bottomaxis = self.ploti.getAxis("bottom")
        bottomaxis.setLabel(var.name)
        bottomaxis.resizeEvent()

        cvar_values = cvar.values
        colors = [QColor(*col) for col in cvar.colors]

        if var and var.is_continuous:
            bottomaxis.setTicks(None)

            weights, cols, cvar_values, curves = [], [], [], []
            for i, dist in enumerate(cont):
                v, W = dist
                if len(v):
                    weights.append(numpy.sum(W))
                    cols.append(colors[i])
                    cvar_values.append(cvar.values[i])
                    curves.append(ash_curve(
                        dist, cont, m=OWDistributions.ASH_HIST,
                        smoothing_factor=self.smoothing_factor))
            weights = numpy.array(weights)
            sumw = numpy.sum(weights)
            weights /= sumw
            colors = cols
            curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)]

            curvesline = [] #from histograms to lines
            for X, Y in curves:
                X = X + (X[1] - X[0])/2
                X = X[:-1]
                X = numpy.array(X)
                Y = numpy.array(Y)
                curvesline.append((X, Y))

            for t in ["fill", "line"]:
                curve_data = list(zip(curvesline, colors, weights, cvar_values))
                for (X, Y), color, w, cval in reversed(curve_data):
                    item = pg.PlotCurveItem()
                    pen = QPen(QBrush(color), 3)
                    pen.setCosmetic(True)
                    color = QColor(color)
                    color.setAlphaF(0.2)
                    item.setData(X, Y/(w if self.relative_freq else 1),
                                 antialias=True, stepMode=False,
                                 fillLevel=0 if t == "fill" else None,
                                 brush=QBrush(color), pen=pen)
                    self.plot.addItem(item)
                    if t == "line":
                        item.tooltip = "{}\n{}={}".format(
                            "Normalized density " if self.relative_freq else "Density ",
                            cvar.name, cval)
                        self.tooltip_items.append((self.plot, item))

            if self.show_prob:
                all_X = numpy.array(numpy.unique(numpy.hstack([X for X, _ in curvesline])))
                inter_X = numpy.array(numpy.linspace(all_X[0], all_X[-1], len(all_X)*2))
                curvesinterp = [numpy.interp(inter_X, X, Y) for (X, Y) in curvesline]
                sumprob = numpy.sum(curvesinterp, axis=0)
                legal = sumprob > 0.05 * numpy.max(sumprob)

                i = len(curvesinterp) + 1
                show_all = self.show_prob == i
                for Y, color, cval in reversed(list(zip(curvesinterp, colors, cvar_values))):
                    i -= 1
                    if show_all or self.show_prob == i:
                        item = pg.PlotCurveItem()
                        pen = QPen(QBrush(color), 3, style=Qt.DotLine)
                        pen.setCosmetic(True)
                        prob = Y[legal] / sumprob[legal]
                        item.setData(
                            inter_X[legal], prob, antialias=True, stepMode=False,
                            fillLevel=None, brush=None, pen=pen)
                        self.plot_prob.addItem(item)
                        item.tooltip = "Probability that \n" + cvar.name + "=" + cval
                        self.tooltip_items.append((self.plot_prob, item))

        elif var and var.is_discrete:
            bottomaxis.setTicks([list(enumerate(var.values))])

            cont = numpy.array(cont)

            maxh = 0 #maximal column height
            maxrh = 0 #maximal relative column height
            scvar = cont.sum(axis=1)
            #a cvar with sum=0 with allways have distribution counts 0,
            #therefore we can divide it by anything
            scvar[scvar == 0] = 1
            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                maxh = max(maxh, max(dist))
                maxrh = max(maxrh, max(dist/scvar))

            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                dsum = sum(dist)
                geom = QRectF(i - 0.333, 0, 0.666,
                              maxrh if self.relative_freq else maxh)
                if self.show_prob:
                    prob = dist / dsum
                    ci = 1.96 * numpy.sqrt(prob * (1 - prob) / dsum)
                else:
                    ci = None
                item = DistributionBarItem(geom, dist/scvar/maxrh
                                           if self.relative_freq
                                           else dist/maxh, colors)
                self.plot.addItem(item)
                tooltip = "\n".join(
                    "%s: %.*f" % (n, 3 if self.relative_freq else 1, v)
                    for n, v in zip(cvar_values, dist/scvar if self.relative_freq else dist))
                item.tooltip = "{} ({}={}):\n{}".format(
                    "Normalized frequency " if self.relative_freq else "Frequency ",
                    cvar.name, value, tooltip)
                self.tooltip_items.append((self.plot, item))

                if self.show_prob:
                    item.tooltip += "\n\nProbabilities:"
                    for ic, a in enumerate(dist):
                        if self.show_prob - 1 != ic and \
                                self.show_prob - 1 != len(dist):
                            continue
                        position = -0.333 + ((ic+0.5)*0.666/len(dist))
                        if dsum < 1e-6:
                            continue
                        prob = a / dsum
                        if not 1e-6 < prob < 1 - 1e-6:
                            continue
                        ci = 1.96 * sqrt(prob * (1 - prob) / dsum)
                        item.tooltip += "\n%s: %.3f ± %.3f" % (cvar_values[ic], prob, ci)
                        mark = pg.ScatterPlotItem()
                        errorbar = pg.ErrorBarItem()
                        pen = QPen(QBrush(QColor(0)), 1)
                        pen.setCosmetic(True)
                        errorbar.setData(x=[i+position], y=[prob],
                                         bottom=min(numpy.array([ci]), prob),
                                         top=min(numpy.array([ci]), 1 - prob),
                                         beam=numpy.array([0.05]),
                                         brush=QColor(1), pen=pen)
                        mark.setData([i+position], [prob], antialias=True, symbol="o",
                                     fillLevel=None, pxMode=True, size=10,
                                     brush=QColor(colors[ic]), pen=pen)
                        self.plot_prob.addItem(errorbar)
                        self.plot_prob.addItem(mark)

        for color, name in zip(colors, cvar_values):
            self._legend.addItem(
                ScatterPlotItem(pen=color, brush=color, size=10, shape="s"),
                escape(name)
            )
        self._legend.show()

    def set_left_axis_name(self):
        leftaxis = self.ploti.getAxis("left")
        set_label = leftaxis.setLabel
        if self.var and self.var.is_continuous:
            set_label(["Density", "Relative density"]
                      [self.cvar is not None and self.relative_freq])
        else:
            set_label(["Frequency", "Relative frequency"]
                      [self.cvar is not None and self.relative_freq])
        leftaxis.resizeEvent()

    def enable_disable_rel_freq(self):
        self.cb_prob.setDisabled(self.var is None or self.cvar is None)
        self.cb_rel_freq.setDisabled(
            self.var is None or self.cvar is None)

    def _on_variable_idx_changed(self):
        self.variable_idx = selected_index(self.varview)
        self._setup()

    def _on_groupvar_idx_changed(self):
        self._setup()

    def _on_set_smoothing(self):
        self._setup()

    def onDeleteWidget(self):
        self.plot.clear()
        super().onDeleteWidget()

    def get_widget_name_extension(self):
        if self.variable_idx >= 0:
            return self.varmodel[self.variable_idx]

    def send_report(self):
        self.plotview.scene().setSceneRect(self.plotview.sceneRect())
        if self.variable_idx < 0:
            return
        self.report_plot()
        text = "Distribution of '{}'".format(
            self.varmodel[self.variable_idx])
        if self.groupvar_idx:
            group_var = self.groupvarmodel[self.groupvar_idx]
            prob = self.cb_prob
            indiv_probs = 0 < prob.currentIndex() < prob.count() - 1
            if not indiv_probs or self.relative_freq:
                text += " grouped by '{}'".format(group_var)
                if self.relative_freq:
                    text += " (relative frequencies)"
            if indiv_probs:
                text += "; probabilites for '{}={}'".format(
                    group_var, prob.currentText())
        self.report_caption(text)
class DistributionTest(OWWidget):
    name = 'Distribution Test'
    description = 'Check if data is in given distribution.'
    icon = 'icons/disttest.svg'
    want_main_area = True
    buttons_area_orientation = Qt.Vertical
    resizing_enabled = True
    inputs = [('Data', Orange.data.Table, 'set_data')]

    available_tests = (
        KolmogorovSmirnov,
        AndersonDarling,
        ShapiroWilk,
        ChiSquare,
    )
    settingsHandler = settings.DomainContextHandler(
        match_values=settings.DomainContextHandler.MATCH_VALUES_ALL)
    #: Selected variable index
    available_distributions = [d for d in Distribution]
    test_idx = 0
    distribution_idx = 0
    column_idx = 0
    own_distribution_idx = 0

    relative_freq = settings.Setting(False)

    smoothing_index = settings.Setting(5)
    show_prob = settings.ContextSetting(0)

    graph_name = "box_scene"

    ASH_HIST = 50

    bins = [2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50]

    def __init__(self):
        super().__init__()
        self.varmodel = itemmodels.VariableListModel()
        self.groupvarmodel = []
        self.distributions = [
            distribution.value for distribution in self.available_distributions
        ]
        box = gui.vBox(self.controlArea, 'Tests')
        gui.radioButtonsInBox(
            box,
            self,
            'test_idx',
            btnLabels=[test.name for test in self.available_tests],
            callback=self.test_changed,
        )

        box = gui.vBox(self.controlArea, 'Distributions')
        self.distribution_choose = gui.radioButtonsInBox(
            box,
            self,
            'distribution_idx',
            btnLabels=self.distributions,
            callback=self.distribution_changed,
        )

        self.column_chose = gui.comboBox(
            self.controlArea,
            self,
            'column_idx',
            box='Selected column',
            items=[],
            orientation=Qt.Horizontal,
            callback=self.column_changed,
        )
        self.available_columns = itemmodels.VariableListModel(parent=self)
        self.column_chose.setModel(self.available_columns)
        self.infolabel = gui.widgetLabel(box, "<center>p-value: </center>")
        self.mainArea.setMinimumWidth(800)
        self.own_distribution_choose = gui.comboBox(
            self.controlArea,
            self,
            'own_distribution_idx',
            box='Own distribution',
            items=[],
            orientation=Qt.Horizontal,
            callback=self.column_changed,
        )

        self.own_distribution_choose.setModel(self.available_columns)
        self.data = None

        self.plotview = pg.PlotWidget(background=None)
        self.plotview.setRenderHint(QPainter.Antialiasing)
        self.mainArea.layout().addWidget(self.plotview)
        w = QLabel()
        w.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
        self.mainArea.layout().addWidget(w, Qt.AlignCenter)
        self.ploti = pg.PlotItem()
        self.box_scene = self.ploti.vb
        self.ploti.hideButtons()
        self.plotview.setCentralItem(self.ploti)

        self.plot_prob = pg.ViewBox()
        self.ploti.scene().addItem(self.plot_prob)
        self.ploti.getAxis("right").linkToView(self.plot_prob)
        self.ploti.getAxis("right").setLabel("Probability")
        self.plot_prob.setZValue(10)
        self.plot_prob.setXLink(self.ploti)
        self.update_views()
        self.ploti.vb.sigResized.connect(self.update_views)
        self.plot_prob.setRange(yRange=[0, 1])

        def disable_mouse(box_scene):
            box_scene.setMouseEnabled(False, False)
            box_scene.setMenuEnabled(False)

        disable_mouse(self.box_scene)
        disable_mouse(self.plot_prob)

        self.tooltip_items = []

        pen = QPen(self.palette().color(QPalette.Text))
        for axis in ("left", "bottom"):
            self.ploti.getAxis(axis).setPen(pen)

        self._legend = LegendItem()
        self._legend.setParentItem(self.box_scene)
        self._legend.hide()
        self._legend.anchor((1, 0), (1, 0))
        self.test_changed()

    def update_views(self):
        """
        resize
        """
        self.plot_prob.setGeometry(self.box_scene.sceneBoundingRect())
        self.plot_prob.linkedViewChanged(self.box_scene, self.plot_prob.XAxis)

    def set_data(self, data):
        self.closeContext()
        self.clear()
        self.warning()
        self.data = data
        self.own_distribution_choose.hide()
        if data is not None:
            self.available_columns[:] = data.domain
            domain = self.data.domain

            self.varmodel[:] = list(domain) + [
                meta for meta in domain.metas
                if meta.is_continuous or meta.is_discrete
            ]
            self.groupvarmodel = \
                ["(None)"] + [var for var in domain if var.is_discrete] + \
                [meta for meta in domain.metas if meta.is_discrete]
            if domain.has_discrete_class:
                self.groupvar_idx = \
                    self.groupvarmodel[1:].index(domain.class_var) + 1
            self.openContext(domain)
            self.column_idx = min(max(self.column_idx, 0),
                                  len(self.varmodel) - 1)
            self.groupvar_idx = min(max(self.groupvar_idx, 0),
                                    len(self.groupvarmodel) - 1)
            self._setup()

    def test_changed(self):
        """
        Management of buttons, depends from type of distribution test.
        Own samples are hidden.
        :return:
        """
        for idx, button in enumerate(self.distribution_choose.buttons):
            if Distribution(button.text()) in self.test.allowed_distribution:
                button.show()

                if self.distribution not in self.test.allowed_distribution:
                    button.toggle()
                    self.distribution_idx = idx
            else:
                self.own_distribution_choose.hide()
                button.hide()
        self.compute_p_value()

    def distribution_changed(self):
        """
         Control buttons of allowed distributions - show or hide
        :return: compute p-value
        """
        if self.distribution == Distribution.OWN:
            self.own_distribution_choose.show()
        else:
            self.own_distribution_choose.hide()
        self.compute_p_value()

    def clear(self):
        self.box_scene.clear()
        self.plot_prob.clear()
        self.varmodel[:] = []
        self.groupvarmodel = []
        self.column_idx = -1
        self.groupvar_idx = 0
        self._legend.clear()
        self._legend.hide()

    def column_changed(self):
        """
        compute p-value if column is changed
        """
        self._setup()
        self.compute_p_value()

    def _setup(self):
        """
        set new plot
        """
        self.box_scene.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self._legend.hide()

        varidx = self.column_idx
        self.var = self.cvar = None
        if varidx >= 0:
            self.var = self.varmodel[varidx]

        data = self.data
        if self.var is None:
            return
        self.set_left_axis_name()
        if self.cvar:

            self.contingencies = \
                contingency.get_contingency(data, self.var, self.cvar)
            self.display_contingency()
        else:

            self.distributions = \
                distribution.get_distribution(data, self.var)
            self.display_distribution()
        self.box_scene.autoRange()

    def compute_p_value(self):
        """
        :return: p-value
        """
        if self.data is not None:
            p_value = self.test.compute(self)
            if isinstance(p_value, float):
                self.infolabel.setText('\np-value: {}'.format(p_value))
            else:

                self.infolabel.setText('\np-value: {}'.format(round(
                    p_value, 3)))

    def _on_relative_freq_changed(self):
        self.set_left_axis_name()
        if self.cvar and self.cvar.is_discrete:
            self.display_contingency()
        else:
            self.display_distribution()
        self.box_scene.autoRange()

    def display_contingency(self):
        """
        Set the contingency to display.
        """
        cont = self.contingencies
        var, cvar = self.var, self.cvar
        assert len(cont) > 0
        self.box_scene.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self.tooltip_items = []

        if self.show_prob:
            self.ploti.showAxis('right')
        else:
            self.ploti.hideAxis('right')

        bottomaxis = self.ploti.getAxis("bottom")
        bottomaxis.setLabel(var.name)
        bottomaxis.resizeEvent()

        cvar_values = cvar.values
        colors = [QColor(*col) for col in cvar.colors]

        if var and var.is_continuous:

            bottomaxis.setTicks(None)

            weights, cols, cvar_values, curves = [], [], [], []
            for i, dist in enumerate(cont):
                v, W = dist
                if len(v):
                    weights.append(np.sum(W))
                    cols.append(colors[i])
                    cvar_values.append(cvar.values[i])
                    curves.append(
                        ash_curve(dist, cont, m=DistributionTest.ASH_HIST))
            weights = np.array(weights)
            sumw = np.sum(weights)
            weights /= sumw
            colors = cols
            curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)]

            curvesline = []  # from histograms to lines
            for (X, Y) in curves:
                X += np.array(((X[1] - X[0]) / 2)[:-1])
                Y = np.array(Y)
                curvesline.append((X, Y))

            for t in ["fill", "line"]:
                for (X, Y), color, w, cval in reversed(
                        list(zip(curvesline, colors, weights, cvar_values))):
                    item = pg.PlotCurveItem()
                    pen = QPen(QBrush(color), 3)
                    pen.setCosmetic(True)
                    color = QColor(color)
                    color.setAlphaF(0.2)
                    item.setData(X,
                                 Y / (w if self.relative_freq else 1),
                                 antialias=True,
                                 stepMode=False,
                                 fillLevel=0 if t == "fill" else None,
                                 brush=QBrush(color),
                                 pen=pen)
                    self.box_scene.addItem(item)
                    if t == "line":
                        if self.relative_freq:
                            density = "Normalized density"
                        else:
                            density = "Density"
                        item.tooltip = "{density}\n{name}={value}".format(
                            value=cval, name=cvar.name, density=density)
                        self.tooltip_items.append((self.box_scene, item))

            if self.show_prob:
                all_X = np.array(
                    np.unique(np.hstack([X for X, _ in curvesline])))
                inter_X = np.array(
                    np.linspace(all_X[0], all_X[-1],
                                len(all_X) * 2))
                curvesinterp = [
                    np.interp(inter_X, X, Y) for (X, Y) in curvesline
                ]
                sumprob = np.sum(curvesinterp, axis=0)
                legal = sumprob > 0.05 * np.max(sumprob)

                i = len(curvesinterp) + 1
                show_all = self.show_prob == i
                for Y, color, cval in reversed(
                        list(zip(curvesinterp, colors, cvar_values))):
                    i -= 1
                    if show_all or self.show_prob == i:
                        item = pg.PlotCurveItem()
                        pen = QPen(QBrush(color), 3, style=Qt.DotLine)
                        pen.setCosmetic(True)
                        prob = Y[legal] / sumprob[legal]
                        item.setData(inter_X[legal],
                                     prob,
                                     antialias=True,
                                     stepMode=False,
                                     fillLevel=None,
                                     brush=None,
                                     pen=pen)
                        self.plot_prob.addItem(item)
                        item.tooltip = \
                            "Probability that \n{name}={value}".format(
                                name=cvar.name, value=cval)
                        self.tooltip_items.append((self.plot_prob, item))

        elif var and var.is_discrete:

            bottomaxis.setTicks([list(enumerate(var.values))])

            cont = np.array(cont)

            maxh = 0  # maximal column height
            maxrh = 0  # maximal relative column height
            scvar = cont.sum(axis=1)
            # a cvar with sum=0 with allways have distribution counts 0,
            # therefore we can divide it by anything
            scvar[scvar == 0] = 1
            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                maxh = max(maxh, max(dist))
                maxrh = max(maxrh, max(dist / scvar))

            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                dsum = sum(dist)
                geom = QRectF(i - 0.333, 0, 0.666,
                              maxrh if self.relative_freq else maxh)
                item = DistributionBarItem(
                    geom, dist / scvar /
                    maxrh if self.relative_freq else dist / maxh, colors)
                self.box_scene.addItem(item)
                tooltip = "\n".join("%s: %.*f" %
                                    (n, 3 if self.relative_freq else 1, v)
                                    for n, v in zip(
                                        cvar_values, dist /
                                        scvar if self.relative_freq else dist))
                if self.relative_freq:
                    frequency = "Normalized frequency"
                else:
                    frequency = "Frequency"
                item.tooltip = \
                    "{frequency}({name}={value}):\n{tooltip}".format(
                        frequency=frequency,
                        name=cvar.name,
                        value=value,
                        tooltip=tooltip,
                    )
                self.tooltip_items.append((self.box_scene, item))

                if self.show_prob:
                    item.tooltip += "\n\nProbabilities:"
                    for ic, a in enumerate(dist):
                        if (self.show_prob - 1 != ic
                            and self.show_prob - 1 != len(dist)) \
                                or dsum < 1e-6:
                            continue
                        position = -0.333 + ((ic + 0.5) * 0.666 / len(dist))
                        prob = a / dsum
                        if not 1e-6 < prob < 1 - 1e-6:
                            continue
                        ci = 1.96 * sqrt(prob * (1 - prob) / dsum)
                        item.tooltip += "\n%s: %.3f ± %.3f" % (cvar_values[ic],
                                                               prob, ci)
                        mark = pg.ScatterPlotItem()
                        bar = pg.ErrorBarItem()
                        pen = QPen(QBrush(QColor(0)), 1)
                        pen.setCosmetic(True)
                        bar.setData(x=[i + position],
                                    y=[prob],
                                    bottom=min(np.array([ci]), prob),
                                    top=min(np.array([ci]), 1 - prob),
                                    beam=np.array([0.05]),
                                    brush=QColor(1),
                                    pen=pen)
                        mark.setData([i + position], [prob],
                                     antialias=True,
                                     symbol="o",
                                     fillLevel=None,
                                     pxMode=True,
                                     size=10,
                                     brush=QColor(colors[ic]),
                                     pen=pen)
                        self.plot_prob.addItem(bar)
                        self.plot_prob.addItem(mark)

        for color, name in zip(colors, cvar_values):
            self._legend.addItem(
                ScatterPlotItem(pen=color, brush=color, size=10, shape="s"),
                escape(name))
        self._legend.show()

    def set_left_axis_name(self):
        leftaxis = self.ploti.getAxis("left")
        set_label = leftaxis.setLabel
        if self.var and self.var.is_continuous:

            set_label(["Density", "Relative density"][self.cvar is not None
                                                      and self.relative_freq])
        else:

            set_label(["Frequency",
                       "Relative frequency"][self.cvar is not None
                                             and self.relative_freq])
        leftaxis.resizeEvent()

    def display_distribution(self):
        dist = self.distributions
        var = self.var
        assert len(dist) > 0
        self.box_scene.clear()
        self.plot_prob.clear()
        self.ploti.hideAxis('right')
        self.tooltip_items = []

        bottomaxis = self.ploti.getAxis("bottom")
        bottomaxis.setLabel(var.name)
        bottomaxis.resizeEvent()

        self.set_left_axis_name()
        if var and var.is_continuous:

            bottomaxis.setTicks(None)
            if not len(dist[0]):
                return
            edges, curve = ash_curve(dist, None, m=DistributionTest.ASH_HIST)
            edges = edges + (edges[1] - edges[0]) / 2
            edges = edges[:-1]
            item = pg.PlotCurveItem()
            pen = QPen(QBrush(Qt.black), 3)
            pen.setCosmetic(True)
            item.setData(edges,
                         curve,
                         antialias=True,
                         stepMode=False,
                         fillLevel=0,
                         brush=QBrush(Qt.gray),
                         pen=pen)
            self.box_scene.addItem(item)
            item.tooltip = "Density"
            self.tooltip_items.append((self.box_scene, item))
        else:

            bottomaxis.setTicks([list(enumerate(var.values))])
            for i, w in enumerate(dist):
                geom = QRectF(i - 0.33, 0, 0.66, w)
                item = DistributionBarItem(geom, [1.0],
                                           [QColor(128, 128, 128)])
                self.box_scene.addItem(item)
                item.tooltip = "Frequency for %s: %r" % (var.values[i], w)
                self.tooltip_items.append((self.box_scene, item))

    def onDeleteWidget(self):
        self.box_scene.clear()
        super().onDeleteWidget()

    def get_widget_name_extension(self):
        if self.column_idx >= 0:
            return self.varmodel[self.column_idx]

    @property
    def test(self) -> Test:
        return self.available_tests[self.test_idx]

    @property
    def distribution(self) -> Distribution:
        return self.available_distributions[self.distribution_idx]

    @property
    def column(self):
        return self.data[:, self.column_idx]

    @property
    def own_distribution(self):
        return self.data[:, self.own_distribution_idx]
示例#4
0
class OWDistributions(widget.OWWidget):
    name = "Distributions"
    description = "Display value distributions of a data feature in a graph."
    icon = "icons/Distribution.svg"
    priority = 100
    inputs = [
        InputSignal("Data",
                    Orange.data.Table,
                    "set_data",
                    doc="Set the input data set")
    ]

    settingsHandler = settings.DomainContextHandler(
        match_values=settings.DomainContextHandler.MATCH_VALUES_ALL)
    #: Selected variable index
    variable_idx = settings.ContextSetting(-1)
    #: Selected group variable
    groupvar_idx = settings.ContextSetting(0)

    relative_freq = settings.Setting(False)
    disc_cont = settings.Setting(False)

    smoothing_index = settings.Setting(5)
    show_prob = settings.ContextSetting(0)

    graph_name = "plot"

    ASH_HIST = 50

    bins = [2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50]
    smoothing_facs = list(
        reversed([0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 4, 6, 10]))

    def __init__(self):
        super().__init__()
        self.data = None

        self.distributions = None
        self.contingencies = None
        self.var = self.cvar = None
        varbox = gui.widgetBox(self.controlArea, "Variable")

        self.varmodel = itemmodels.VariableListModel()
        self.groupvarmodel = []

        self.varview = QtGui.QListView(
            selectionMode=QtGui.QListView.SingleSelection)
        self.varview.setSizePolicy(QtGui.QSizePolicy.Minimum,
                                   QtGui.QSizePolicy.Expanding)
        self.varview.setModel(self.varmodel)
        self.varview.setSelectionModel(
            itemmodels.ListSingleSelectionModel(self.varmodel))
        self.varview.selectionModel().selectionChanged.connect(
            self._on_variable_idx_changed)
        varbox.layout().addWidget(self.varview)

        box = gui.widgetBox(self.controlArea, "Precision")

        gui.separator(self.controlArea, 4, 4)

        box2 = gui.widgetBox(box, orientation="horizontal")
        self.l_smoothing_l = gui.widgetLabel(box2, "Smooth")
        gui.hSlider(box2,
                    self,
                    "smoothing_index",
                    minValue=0,
                    maxValue=len(self.smoothing_facs) - 1,
                    callback=self._on_set_smoothing,
                    createLabel=False)
        self.l_smoothing_r = gui.widgetLabel(box2, "Precise")

        self.cb_disc_cont = gui.checkBox(
            gui.indentedBox(box, sep=4),
            self,
            "disc_cont",
            "Bin continuous variables",
            callback=self._on_groupvar_idx_changed,
            tooltip="Show continuous variables as discrete.")

        box = gui.widgetBox(self.controlArea, "Group by")
        self.icons = gui.attributeIconDict
        self.groupvarview = gui.comboBox(
            box,
            self,
            "groupvar_idx",
            callback=self._on_groupvar_idx_changed,
            valueType=str,
            contentsLength=12)
        box2 = gui.indentedBox(box, sep=4)
        self.cb_rel_freq = gui.checkBox(
            box2,
            self,
            "relative_freq",
            "Show relative frequencies",
            callback=self._on_relative_freq_changed,
            tooltip=
            "Normalize probabilities so that probabilities for each group-by value sum to 1."
        )
        gui.separator(box2)
        self.cb_prob = gui.comboBox(
            box2,
            self,
            "show_prob",
            label="Show probabilities",
            orientation="horizontal",
            callback=self._on_relative_freq_changed,
            tooltip=
            "Show probabilities for a chosen group-by value (at each point probabilities for all group-by values sum to 1)."
        )

        self.plotview = pg.PlotWidget(background=None)
        self.plotview.setRenderHint(QtGui.QPainter.Antialiasing)
        self.mainArea.layout().addWidget(self.plotview)
        w = QtGui.QLabel()
        w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed)
        self.mainArea.layout().addWidget(w, Qt.AlignCenter)
        self.ploti = pg.PlotItem()
        self.plot = self.ploti.vb
        self.ploti.hideButtons()
        self.plotview.setCentralItem(self.ploti)

        self.plot_prob = pg.ViewBox()
        self.ploti.hideAxis('right')
        self.ploti.scene().addItem(self.plot_prob)
        self.ploti.getAxis("right").linkToView(self.plot_prob)
        self.ploti.getAxis("right").setLabel("Probability")
        self.plot_prob.setZValue(10)
        self.plot_prob.setXLink(self.ploti)
        self.update_views()
        self.ploti.vb.sigResized.connect(self.update_views)
        self.plot_prob.setRange(yRange=[0, 1])

        self.inline_graph_report()

        def disable_mouse(plot):
            plot.setMouseEnabled(False, False)
            plot.setMenuEnabled(False)

        disable_mouse(self.plot)
        disable_mouse(self.plot_prob)

        self.tooltip_items = []
        self.plot.scene().installEventFilter(
            HelpEventDelegate(self.help_event, self))

        pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text))
        for axis in ("left", "bottom"):
            self.ploti.getAxis(axis).setPen(pen)

        self._legend = LegendItem()
        self._legend.setParentItem(self.plot)
        self._legend.hide()
        self._legend.anchor((1, 0), (1, 0))

    def update_views(self):
        self.plot_prob.setGeometry(self.plot.sceneBoundingRect())
        self.plot_prob.linkedViewChanged(self.plot, self.plot_prob.XAxis)

    def set_data(self, data):
        self.closeContext()
        self.clear()
        self.data = data
        if self.data is not None:
            domain = self.data.domain
            self.varmodel[:] = list(domain)

            self.groupvarview.clear()
            self.groupvarmodel = \
                ["(None)"] + [var for var in domain if var.is_discrete]
            self.groupvarview.addItem("(None)")
            for var in self.groupvarmodel[1:]:
                self.groupvarview.addItem(self.icons[var], var.name)
            if domain.has_discrete_class:
                self.groupvar_idx = \
                    self.groupvarmodel[1:].index(domain.class_var) + 1
            self.openContext(domain)
            self.variable_idx = min(max(self.variable_idx, 0),
                                    len(self.varmodel) - 1)
            self.groupvar_idx = min(max(self.groupvar_idx, 0),
                                    len(self.groupvarmodel) - 1)
            itemmodels.select_row(self.varview, self.variable_idx)
            self._setup()

    def clear(self):
        self.plot.clear()
        self.plot_prob.clear()
        self.varmodel[:] = []
        self.groupvarmodel = []
        self.variable_idx = -1
        self.groupvar_idx = 0
        self._legend.clear()
        self._legend.hide()

    def _setup_smoothing(self):
        if not self.disc_cont and self.var and self.var.is_continuous:
            self.cb_disc_cont.setText("Bin continuous variables")
            self.l_smoothing_l.setText("Smooth")
            self.l_smoothing_r.setText("Precise")
        else:
            self.cb_disc_cont.setText(
                "Bin continuous variables into {} bins".format(
                    self.bins[self.smoothing_index]))
            self.l_smoothing_l.setText(" " + str(self.bins[0]))
            self.l_smoothing_r.setText(" " + str(self.bins[-1]))

    def _setup(self):
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self._legend.hide()

        varidx = self.variable_idx
        self.var = self.cvar = None
        if varidx >= 0:
            self.var = self.varmodel[varidx]
        if self.groupvar_idx > 0:
            self.cvar = self.groupvarmodel[self.groupvar_idx]
            self.cb_prob.clear()
            self.cb_prob.addItem("(None)")
            self.cb_prob.addItems(self.cvar.values)
            self.cb_prob.addItem("(All)")
            self.show_prob = min(max(self.show_prob, 0),
                                 len(self.cvar.values) + 1)
        data = self.data
        self._setup_smoothing()
        if self.var is None:
            return
        if self.disc_cont:
            data = self.data[:,
                             (self.var, self.cvar) if self.cvar else self.var]
            disc = Orange.preprocess.discretize.EqualWidth(
                n=self.bins[self.smoothing_index])
            data = Orange.preprocess.Discretize(data,
                                                method=disc,
                                                remove_const=False)
            self.var = data.domain[0]
        self.set_left_axis_name()
        self.enable_disable_rel_freq()
        if self.cvar:
            self.contingencies = \
                contingency.get_contingency(data, self.var, self.cvar)
            self.display_contingency()
        else:
            self.distributions = \
                distribution.get_distribution(data, self.var)
            self.display_distribution()
        self.plot.autoRange()

    def help_event(self, ev):
        in_graph_coor = self.plot.mapSceneToView(ev.scenePos())
        ctooltip = []
        for vb, item in self.tooltip_items:
            if isinstance(item,
                          pg.PlotCurveItem) and item.mouseShape().contains(
                              vb.mapSceneToView(ev.scenePos())):
                ctooltip.append(item.tooltip)
            elif isinstance(
                    item,
                    DistributionBarItem) and item.boundingRect().contains(
                        vb.mapSceneToView(ev.scenePos())):
                ctooltip.append(item.tooltip)
        if ctooltip:
            QToolTip.showText(ev.screenPos(),
                              "\n\n".join(ctooltip),
                              widget=self.plotview)
            return True
        return False

    def display_distribution(self):
        dist = self.distributions
        var = self.var
        assert len(dist) > 0
        self.plot.clear()
        self.plot_prob.clear()
        self.ploti.hideAxis('right')
        self.tooltip_items = []

        bottomaxis = self.ploti.getAxis("bottom")
        bottomaxis.setLabel(var.name)
        bottomaxis.resizeEvent()

        self.set_left_axis_name()
        if var and var.is_continuous:
            bottomaxis.setTicks(None)
            if not len(dist[0]):
                return
            edges, curve = ash_curve(
                dist,
                None,
                m=OWDistributions.ASH_HIST,
                smoothing_factor=self.smoothing_facs[self.smoothing_index])
            edges = edges + (edges[1] - edges[0]) / 2
            edges = edges[:-1]
            item = pg.PlotCurveItem()
            pen = QtGui.QPen(QtGui.QBrush(Qt.white), 3)
            pen.setCosmetic(True)
            item.setData(edges,
                         curve,
                         antialias=True,
                         stepMode=False,
                         fillLevel=0,
                         brush=QtGui.QBrush(Qt.gray),
                         pen=pen)
            self.plot.addItem(item)
            item.tooltip = "Density"
            self.tooltip_items.append((self.plot, item))
        else:
            bottomaxis.setTicks([list(enumerate(var.values))])
            for i, w in enumerate(dist):
                geom = QtCore.QRectF(i - 0.33, 0, 0.66, w)
                item = DistributionBarItem(geom, [1.0],
                                           [QtGui.QColor(128, 128, 128)])
                self.plot.addItem(item)
                item.tooltip = "Frequency for %s: %r" % (var.values[i], w)
                self.tooltip_items.append((self.plot, item))

    def _on_relative_freq_changed(self):
        self.set_left_axis_name()
        if self.cvar and self.cvar.is_discrete:
            self.display_contingency()
        else:
            self.display_distribution()
        self.plot.autoRange()

    def display_contingency(self):
        """
        Set the contingency to display.
        """
        cont = self.contingencies
        var, cvar = self.var, self.cvar
        assert len(cont) > 0
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self.tooltip_items = []

        if self.show_prob:
            self.ploti.showAxis('right')
        else:
            self.ploti.hideAxis('right')

        bottomaxis = self.ploti.getAxis("bottom")
        bottomaxis.setLabel(var.name)
        bottomaxis.resizeEvent()

        cvar_values = cvar.values
        colors = [QtGui.QColor(*col) for col in cvar.colors]

        if var and var.is_continuous:
            bottomaxis.setTicks(None)

            weights, cols, cvar_values, curves = [], [], [], []
            for i, dist in enumerate(cont):
                v, W = dist
                if len(v):
                    weights.append(numpy.sum(W))
                    cols.append(colors[i])
                    cvar_values.append(cvar.values[i])
                    curves.append(
                        ash_curve(dist,
                                  cont,
                                  m=OWDistributions.ASH_HIST,
                                  smoothing_factor=self.smoothing_facs[
                                      self.smoothing_index]))
            weights = numpy.array(weights)
            sumw = numpy.sum(weights)
            weights /= sumw
            colors = cols
            curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)]
            ncval = len(cvar_values)

            curvesline = []  #from histograms to lines
            for (X, Y) in curves:
                X = X + (X[1] - X[0]) / 2
                X = X[:-1]
                X = numpy.array(X)
                Y = numpy.array(Y)
                curvesline.append((X, Y))

            for t in ["fill", "line"]:
                for (X, Y), color, w, cval in reversed(
                        list(zip(curvesline, colors, weights, cvar_values))):
                    item = pg.PlotCurveItem()
                    pen = QtGui.QPen(QtGui.QBrush(color), 3)
                    pen.setCosmetic(True)
                    color = QtGui.QColor(color)
                    color.setAlphaF(0.2)
                    item.setData(X,
                                 Y / (w if self.relative_freq else 1),
                                 antialias=True,
                                 stepMode=False,
                                 fillLevel=0 if t == "fill" else None,
                                 brush=QtGui.QBrush(color),
                                 pen=pen)
                    self.plot.addItem(item)
                    if t == "line":
                        item.tooltip = ("Normalized density " if self.relative_freq else "Density ") \
                            + "\n"+ cvar.name + "=" + cval
                        self.tooltip_items.append((self.plot, item))

            if self.show_prob:
                M_EST = 5  #for M estimate
                all_X = numpy.array(
                    numpy.unique(numpy.hstack([X for X, _ in curvesline])))
                inter_X = numpy.array(
                    numpy.linspace(all_X[0], all_X[-1],
                                   len(all_X) * 2))
                curvesinterp = [
                    numpy.interp(inter_X, X, Y) for (X, Y) in curvesline
                ]
                sumprob = numpy.sum(curvesinterp, axis=0)
                # allcorrection = M_EST/sumw*numpy.sum(sumprob)/len(inter_X)
                legal = sumprob > 0.05 * numpy.max(sumprob)

                i = len(curvesinterp) + 1
                show_all = self.show_prob == i
                for Y, color, cval in reversed(
                        list(zip(curvesinterp, colors, cvar_values))):
                    i -= 1
                    if show_all or self.show_prob == i:
                        item = pg.PlotCurveItem()
                        pen = QtGui.QPen(QtGui.QBrush(color),
                                         3,
                                         style=QtCore.Qt.DotLine)
                        pen.setCosmetic(True)
                        #prob = (Y+allcorrection/ncval)/(sumprob+allcorrection)
                        prob = Y[legal] / sumprob[legal]
                        item.setData(inter_X[legal],
                                     prob,
                                     antialias=True,
                                     stepMode=False,
                                     fillLevel=None,
                                     brush=None,
                                     pen=pen)
                        self.plot_prob.addItem(item)
                        item.tooltip = "Probability that \n" + cvar.name + "=" + cval
                        self.tooltip_items.append((self.plot_prob, item))

        elif var and var.is_discrete:
            bottomaxis.setTicks([list(enumerate(var.values))])

            cont = numpy.array(cont)
            ncval = len(cvar_values)

            maxh = 0  #maximal column height
            maxrh = 0  #maximal relative column height
            scvar = cont.sum(axis=1)
            #a cvar with sum=0 with allways have distribution counts 0,
            #therefore we can divide it by anything
            scvar[scvar == 0] = 1
            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                maxh = max(maxh, max(dist))
                maxrh = max(maxrh, max(dist / scvar))

            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                dsum = sum(dist)
                geom = QtCore.QRectF(i - 0.333, 0, 0.666,
                                     maxrh if self.relative_freq else maxh)
                if self.show_prob:
                    prob = dist / dsum
                    ci = 1.96 * numpy.sqrt(prob * (1 - prob) / dsum)
                else:
                    ci = None
                item = DistributionBarItem(
                    geom, dist / scvar /
                    maxrh if self.relative_freq else dist / maxh, colors)
                self.plot.addItem(item)
                tooltip = "\n".join("%s: %.*f" %
                                    (n, 3 if self.relative_freq else 1, v)
                                    for n, v in zip(
                                        cvar_values, dist /
                                        scvar if self.relative_freq else dist))
                item.tooltip = ("Normalized frequency " if self.relative_freq else "Frequency ") \
                    + "(" + cvar.name + "=" + value + "):" \
                    + "\n" + tooltip
                self.tooltip_items.append((self.plot, item))

                if self.show_prob:
                    item.tooltip += "\n\nProbabilities:"
                    for ic, a in enumerate(dist):
                        if self.show_prob - 1 != ic and \
                                self.show_prob - 1 != len(dist):
                            continue
                        position = -0.333 + ((ic + 0.5) * 0.666 / len(dist))
                        if dsum < 1e-6:
                            continue
                        prob = a / dsum
                        if not 1e-6 < prob < 1 - 1e-6:
                            continue
                        ci = 1.96 * sqrt(prob * (1 - prob) / dsum)
                        item.tooltip += "\n%s: %.3f ± %.3f" % (cvar_values[ic],
                                                               prob, ci)
                        mark = pg.ScatterPlotItem()
                        bar = pg.ErrorBarItem()
                        pen = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0)), 1)
                        pen.setCosmetic(True)
                        bar.setData(x=[i + position],
                                    y=[prob],
                                    bottom=min(numpy.array([ci]), prob),
                                    top=min(numpy.array([ci]), 1 - prob),
                                    beam=numpy.array([0.05]),
                                    brush=QtGui.QColor(1),
                                    pen=pen)
                        mark.setData([i + position], [prob],
                                     antialias=True,
                                     symbol="o",
                                     fillLevel=None,
                                     pxMode=True,
                                     size=10,
                                     brush=QtGui.QColor(colors[ic]),
                                     pen=pen)
                        self.plot_prob.addItem(bar)
                        self.plot_prob.addItem(mark)

        for color, name in zip(colors, cvar_values):
            self._legend.addItem(
                ScatterPlotItem(pen=color, brush=color, size=10, shape="s"),
                escape(name))
        self._legend.show()

    def set_left_axis_name(self):
        leftaxis = self.ploti.getAxis("left")
        set_label = leftaxis.setLabel
        if self.var and self.var.is_continuous:
            set_label(["Density", "Relative density"][self.cvar is not None
                                                      and self.relative_freq])
        else:
            set_label(["Frequency",
                       "Relative frequency"][self.cvar is not None
                                             and self.relative_freq])
        leftaxis.resizeEvent()

    def enable_disable_rel_freq(self):
        self.cb_prob.setDisabled(self.var is None or self.cvar is None)
        self.cb_rel_freq.setDisabled(self.var is None or self.cvar is None)

    def _on_variable_idx_changed(self):
        self.variable_idx = selected_index(self.varview)
        self._setup()

    def _on_groupvar_idx_changed(self):
        self._setup()

    def _on_set_smoothing(self):
        self._setup()

    def onDeleteWidget(self):
        self.plot.clear()
        super().onDeleteWidget()

    def get_widget_name_extension(self):
        if self.variable_idx >= 0:
            return self.varmodel[self.variable_idx]

    def send_report(self):
        if self.variable_idx < 0:
            return
        self.report_plot()
        text = "Distribution of '{}'".format(self.varmodel[self.variable_idx])
        if self.groupvar_idx:
            group_var = self.groupvarmodel[self.groupvar_idx]
            prob = self.cb_prob
            indiv_probs = 0 < prob.currentIndex() < prob.count() - 1
            if not indiv_probs or self.relative_freq:
                text += " grouped by '{}'".format(group_var)
                if self.relative_freq:
                    text += " (relative frequencies)"
            if indiv_probs:
                text += "; probabilites for '{}={}'".format(
                    group_var, prob.currentText())
        self.report_caption(text)
示例#5
0
class OWDistributions(widget.OWWidget):
    name = "Distributions"
    description = "Display value distributions of a data feature in a graph."
    icon = "icons/Distribution.svg"
    priority = 100
    inputs = [InputSignal("Data", Orange.data.Table, "set_data",
                          doc="Set the input data set")]

    settingsHandler = settings.DomainContextHandler()
    #: Selected variable index
    variable_idx = settings.ContextSetting(-1)
    #: Selected group variable
    groupvar_idx = settings.ContextSetting(0)

    Hist, ASH, Kernel = 0, 1, 2
    #: Continuous variable density estimation method
    cont_est_type = settings.Setting(ASH)
    relative_freq = settings.Setting(False)

    def __init__(self, parent=None):
        super().__init__(parent)
        self.data = None

        self.distributions = None
        self.contingencies = None
        self.var = self.cvar = None
        varbox = gui.widgetBox(self.controlArea, "Variable")

        self.varmodel = itemmodels.VariableListModel()
        self.groupvarmodel = itemmodels.VariableListModel()

        self.varview = QtGui.QListView(
            selectionMode=QtGui.QListView.SingleSelection)
        self.varview.setSizePolicy(
            QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding)
        self.varview.setModel(self.varmodel)
        self.varview.setSelectionModel(
            itemmodels.ListSingleSelectionModel(self.varmodel))
        self.varview.selectionModel().selectionChanged.connect(
            self._on_variable_idx_changed)
        varbox.layout().addWidget(self.varview)
        gui.separator(varbox, 8, 8)
        gui.comboBox(
            varbox, self, "cont_est_type", label="Show continuous variables by",
            valueType=int,
            items=["Histograms", "Average shifted histograms",
                   "Kernel density estimators"],
            callback=self._on_cont_est_type_changed)

        box = gui.widgetBox(self.controlArea, "Group by")
        self.groupvarview = QtGui.QListView(
            selectionMode=QtGui.QListView.SingleSelection)
        self.groupvarview.setFixedHeight(100)
        self.groupvarview.setSizePolicy(
            QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Preferred)
        self.groupvarview.setModel(self.groupvarmodel)
        self.groupvarview.selectionModel().selectionChanged.connect(
            self._on_groupvar_idx_changed)
        box.layout().addWidget(self.groupvarview)
        self.cb_rel_freq = gui.checkBox(
            box, self, "relative_freq", "Show relative frequencies",
            callback=self._on_relative_freq_changed)

        plotview = pg.PlotWidget(background=None)
        self.mainArea.layout().addWidget(plotview)
        w = QtGui.QLabel()
        w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed)
        self.mainArea.layout().addWidget(w, Qt.AlignCenter)

        self.plot = pg.PlotItem()
#         self.plot.getViewBox().setMouseEnabled(False, False)
        self.plot.getViewBox().setMenuEnabled(False)
        plotview.setCentralItem(self.plot)

        pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text))
        for axis in ("left", "bottom"):
            self.plot.getAxis(axis).setPen(pen)

        self._legend = LegendItem()
        self._legend.setParentItem(self.plot.getViewBox())
        self._legend.hide()
        self._legend.anchor((1, 0), (1, 0))

    def set_data(self, data):
        self.closeContext()
        self.clear()
        self.data = data
        if self.data is not None:
            domain = self.data.domain
            self.varmodel[:] = list(domain)
            self.groupvarmodel[:] = \
                ["(None)"] + [var for var in domain if var.is_discrete]
            if domain.has_discrete_class:
                self.groupvar_idx = \
                    list(self.groupvarmodel).index(domain.class_var)
            self.openContext(domain)
            self.variable_idx = min(max(self.variable_idx, 0),
                                    len(self.varmodel) - 1)
            self.groupvar_idx = min(max(self.groupvar_idx, 0),
                                    len(self.groupvarmodel) - 1)
            itemmodels.select_row(self.groupvarview, self.groupvar_idx)
            itemmodels.select_row(self.varview, self.variable_idx)
            self._setup()

    def clear(self):
        self.plot.clear()
        self.varmodel[:] = []
        self.groupvarmodel[:] = []
        self.variable_idx = -1
        self.groupvar_idx = 0
        self._legend.clear()
        self._legend.hide()

    def _setup(self):
        self.plot.clear()
        self._legend.clear()
        self._legend.hide()

        varidx = self.variable_idx
        self.var = self.cvar = None
        if varidx >= 0:
            self.var = self.varmodel[varidx]
        if self.groupvar_idx > 0:
            self.cvar = self.groupvarmodel[self.groupvar_idx]
        self.set_left_axis_name()
        self.enable_disable_rel_freq()
        if self.var is None:
            return
        if self.cvar:
            self.contingencies = \
                contingency.get_contingency(self.data, self.var, self.cvar)
            self.display_contingency()
        else:
            self.distributions = \
                distribution.get_distribution(self.data, self.var)
            self.display_distribution()

    def _density_estimator(self):
        if self.cont_est_type == OWDistributions.Hist:
            def hist(dist):
                h, edges = numpy.histogram(dist[0, :], bins=10,
                                           weights=dist[1, :])
                return edges, h
            return hist
        elif self.cont_est_type == OWDistributions.ASH:
            return lambda dist: ash_curve(dist, m=5)
        elif self.cont_est_type == OWDistributions.Kernel:
            return rect_kernel_curve

    def display_distribution(self):
        dist = self.distributions
        var = self.var
        assert len(dist) > 0
        self.plot.clear()

        bottomaxis = self.plot.getAxis("bottom")
        bottomaxis.setLabel(var.name)

        self.set_left_axis_name()
        if var and var.is_continuous:
            bottomaxis.setTicks(None)
            curve_est = self._density_estimator()
            edges, curve = curve_est(dist)
            item = pg.PlotCurveItem()
            item.setData(edges, curve, antialias=True, stepMode=True,
                         fillLevel=0, brush=QtGui.QBrush(Qt.gray),
                         pen=QtGui.QColor(Qt.white))
            self.plot.addItem(item)
        else:
            bottomaxis.setTicks([list(enumerate(var.values))])
            for i, w in enumerate(dist):
                geom = QtCore.QRectF(i - 0.33, 0, 0.66, w)
                item = DistributionBarItem(geom, [1.0],
                                           [QtGui.QColor(128, 128, 128)])
                self.plot.addItem(item)

    def _on_relative_freq_changed(self):
        self.set_left_axis_name()
        if self.cvar and self.cvar.is_discrete:
            self.display_contingency()
        else:
            self.display_distribution()

    def display_contingency(self):
        """
        Set the contingency to display.
        """
        cont = self.contingencies
        var, cvar = self.var, self.cvar
        assert len(cont) > 0
        self.plot.clear()
        self._legend.clear()

        bottomaxis = self.plot.getAxis("bottom")
        bottomaxis.setLabel(var.name)

        palette = colorpalette.ColorPaletteGenerator(len(cvar.values))
        colors = [palette[i] for i in range(len(cvar.values))]

        if var and var.is_continuous:
            bottomaxis.setTicks(None)

            weights = numpy.array([numpy.sum(W) for _, W in cont])
            weights /= numpy.sum(weights)

            curve_est = self._density_estimator()
            curves = [curve_est(dist) for dist in cont]
            curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)]

            cum_curves = [curves[0]]
            for X, Y in curves[1:]:
                cum_curves.append(sum_rect_curve(X, Y, *cum_curves[-1]))

            for (X, Y), color in reversed(list(zip(cum_curves, colors))):
                item = pg.PlotCurveItem()
                pen = QtGui.QPen(QtGui.QBrush(Qt.white), 0.5)
                pen.setCosmetic(True)
                item.setData(X, Y, antialias=True, stepMode=True,
                             fillLevel=0, brush=QtGui.QBrush(color.lighter()),
                             pen=pen)
                self.plot.addItem(item)

#             # XXX: sum the individual curves and not the distributions.
#             # The conditional distributions might be 'smoother' than
#             # the cumulative one
#             cum_dist = [cont[0]]
#             for dist in cont[1:]:
#                 cum_dist.append(dist_sum(dist, cum_dist[-1]))
#
#             curves = [rect_kernel_curve(dist) for dist in cum_dist]
#             colors = [Qt.blue, Qt.red, Qt.magenta]
#             for (X, Y), color in reversed(list(zip(curves, colors))):
#                 item = pg.PlotCurveItem()
#                 item.setData(X, Y, antialias=True, stepMode=True,
#                              fillLevel=0, brush=QtGui.QBrush(color))
#                 item.setPen(QtGui.QPen(color))
#                 self.plot.addItem(item)
        elif var and var.is_discrete:
            bottomaxis.setTicks([list(enumerate(var.values))])

            cont = numpy.array(cont)
            for i, (value, dist) in enumerate(zip(var.values, cont.T)):
                dsum = sum(dist)
                geom = QtCore.QRectF(i - 0.333, 0, 0.666, 100
                                     if self.relative_freq else dsum)
                item = DistributionBarItem(geom, dist / dsum, colors)
                self.plot.addItem(item)

        for color, name in zip(colors, cvar.values):
            self._legend.addItem(
                ScatterPlotItem(pen=color, brush=color, size=10, shape="s"),
                name
            )
        self._legend.show()

    def set_left_axis_name(self):
        set_label = self.plot.getAxis("left").setLabel
        if (self.var and
            self.var.is_continuous and
            self.cont_est_type != OWDistributions.Hist):
            set_label("Density")
        else:
            set_label(["Frequency", "Relative frequency"]
                      [self.cvar is not None and self.relative_freq])

    def enable_disable_rel_freq(self):
        self.cb_rel_freq.setDisabled(
            self.var is None or self.cvar is None or self.var.is_continuous)

    def _on_variable_idx_changed(self):
        self.variable_idx = selected_index(self.varview)
        self._setup()

    def _on_groupvar_idx_changed(self):
        self.groupvar_idx = selected_index(self.groupvarview)
        self._setup()

    def _on_cont_est_type_changed(self):
        self.set_left_axis_name()
        if self.data is not None:
            self._setup()

    def onDeleteWidget(self):
        self.plot.clear()
        super().onDeleteWidget()