Exemplo n.º 1
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    def __init__(self):
        super().__init__()
        self.model = None
        self.forest_adapter = None
        self.instances = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []
        # In some rare cases, we need to prevent commiting, the only one
        # that this currently helps is that when changing the size calculation
        # the trees are all recomputed, but we don't want to output a new tree
        # to keep things consistent with other ui controls.
        self.__prevent_commit = False

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x + 1)),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info)

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(box_display,
                                           self,
                                           'depth_limit',
                                           label='Depth',
                                           ticks=False,
                                           callback=self.update_depth)
        self.ui_target_class_combo = gui.comboBox(box_display,
                                                  self,
                                                  'target_class_index',
                                                  label='Target class',
                                                  orientation=Qt.Horizontal,
                                                  items=[],
                                                  contentsLength=8,
                                                  callback=self.update_colors)
        self.ui_size_calc_combo = gui.comboBox(
            box_display,
            self,
            'size_calc_idx',
            label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0],
            contentsLength=8,
            callback=self.update_size_calc)
        self.ui_zoom_slider = gui.hSlider(box_display,
                                          self,
                                          'zoom',
                                          label='Zoom',
                                          ticks=False,
                                          minValue=20,
                                          maxValue=150,
                                          callback=self.zoom_changed,
                                          createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred,
                                       QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self._draw_trees()
            self.color_palette = self.forest_adapter.get_trees()[0]

            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and self.instances.domain != model.domain:
                self.clf_dataset = Table.from_table(self.model.domain,
                                                    self.instances)
            else:
                self.clf_dataset = self.instances

            self._update_info_box()
            self._update_target_class_combo()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    def update_depth(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def update_colors(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_changed(self.target_class_index)

    def update_size_calc(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            with self._prevent_commit():
                self.grid.clear()
                self._draw_trees()
                # Keep the selected item
                if self.selected_tree_index != -1:
                    self.grid_items[self.selected_tree_index].setSelected(True)
                self.update_depth()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    @contextmanager
    def _prevent_commit(self):
        try:
            self.__prevent_commit = True
            yield
        finally:
            self.__prevent_commit = False

    def _update_info_box(self):
        self.ui_info.setText('Trees: {}'.format(
            len(self.forest_adapter.get_trees())))

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    def _get_max_depth(self):
        return max(tree.tree_adapter.max_depth for tree in self.ptrees)

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    @contextmanager
    def disable_ui(self):
        """Temporarly disable the UI while trees may be redrawn."""
        try:
            self.ui_size_calc_combo.setEnabled(False)
            self.ui_depth_slider.setEnabled(False)
            self.ui_target_class_combo.setEnabled(False)
            self.ui_zoom_slider.setEnabled(False)
            yield
        finally:
            self.ui_size_calc_combo.setEnabled(True)
            self.ui_depth_slider.setEnabled(True)
            self.ui_target_class_combo.setEnabled(True)
            self.ui_zoom_slider.setEnabled(True)

    def _draw_trees(self):
        self.grid_items, self.ptrees = [], []

        num_trees = len(self.forest_adapter.get_trees())
        with self.progressBar(num_trees) as prg, self.disable_ui():
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None,
                    tree,
                    interactive=False,
                    padding=100,
                    target_class_index=self.target_class_index,
                    weight_adjustment=self.SIZE_CALCULATION[
                        self.size_calc_idx][1])
                grid_item = GridItem(ptree,
                                     self.grid,
                                     max_size=self._calculate_zoom(self.zoom))
                # We don't want to show flickering while the trees are being
                grid_item.setVisible(False)

                self.grid_items.append(grid_item)
                self.ptrees.append(ptree)
                prg.advance()

            self.grid.set_items(self.grid_items)
            # This is necessary when adding items for the first time
            if self.grid:
                width = (self.view.width() -
                         self.view.verticalScrollBar().width())
                self.grid.reflow(width)
                self.grid.setPreferredWidth(width)
                # After drawing is complete, we show the trees
                for grid_item in self.grid_items:
                    grid_item.setVisible(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if self.__prevent_commit:
            return

        if not self.scene.selectedItems():
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        tree = self.model.trees[self.selected_tree_index]
        tree.instances = self.instances
        tree.meta_target_class_index = self.target_class_index
        tree.meta_size_calc_idx = self.size_calc_idx
        tree.meta_depth_limit = self.depth_limit

        self.send('Tree', tree)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [
            x for x in self.ui_target_class_combo.parent().children()
            if isinstance(x, QLabel)
        ][0]

        if self.instances.domain.has_discrete_class:
            label_text = 'Target class'
            values = [
                c.title() for c in self.instances.domain.class_vars[0].values
            ]
            values.insert(0, 'None')
        else:
            label_text = 'Node color'
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.ui_target_class_combo.addItems(values)
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)
Exemplo n.º 2
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    size_log_scale = settings.Setting(2)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    CLASSIFICATION, REGRESSION = range(2)

    def __init__(self):
        super().__init__()
        # Instance variables
        self.forest_type = self.CLASSIFICATION
        self.model = None
        self.forest_adapter = None
        self.dataset = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x * self.size_log_scale)),
        ]

        self.REGRESSION_COLOR_CALC = [
            ('None', lambda _, __: QColor(255, 255, 255)),
            ('Class mean', self._color_class_mean),
            ('Standard deviation', self._color_stddev),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info, label='')

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(
            box_display, self, 'depth_limit', label='Depth', ticks=False,
            callback=self.max_depth_changed)
        self.ui_target_class_combo = gui.comboBox(
            box_display, self, 'target_class_index', label='Target class',
            orientation=Qt.Horizontal, items=[], contentsLength=8,
            callback=self.target_colors_changed)
        self.ui_size_calc_combo = gui.comboBox(
            box_display, self, 'size_calc_idx', label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8,
            callback=self.size_calc_changed)
        self.ui_zoom_slider = gui.hSlider(
            box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20,
            maxValue=150, callback=self.zoom_changed, createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            if isinstance(model, RandomForestClassifier):
                self.forest_type = self.CLASSIFICATION
            elif isinstance(model, RandomForestRegressor):
                self.forest_type = self.REGRESSION
            else:
                raise RuntimeError('Invalid type of forest.')

            self.forest_adapter = self._get_forest_adapter(self.model)
            self.color_palette = self._type_specific('_get_color_palette')()
            self._draw_trees()

            self.dataset = model.instances
            # this bit is important for the regression classifier
            if self.dataset is not None and \
                    self.dataset.domain != model.domain:
                self.clf_dataset = Table.from_table(
                    self.model.domain, self.dataset)
            else:
                self.clf_dataset = self.dataset

            self._update_info_box()
            self._type_specific('_update_target_class_combo')()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    # CONTROL AREA CALLBACKS
    def max_depth_changed(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def target_colors_changed(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_has_changed()

    def size_calc_changed(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self.grid.clear()
            self._draw_trees()
            # Keep the selected item
            if self.selected_tree_index != -1:
                self.grid_items[self.selected_tree_index].setSelected(True)
            self.max_depth_changed()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    # MODEL CHANGED METHODS
    def _update_info_box(self):
        self.ui_info.setText(
            'Trees: {}'.format(len(self.forest_adapter.get_trees()))
        )

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    # MODEL CLEARED METHODS
    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    # HELPFUL METHODS
    def _get_max_depth(self):
        return max([tree.tree_adapter.max_depth for tree in self.ptrees])

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    def _draw_trees(self):
        self.ui_size_calc_combo.setEnabled(False)
        self.grid_items, self.ptrees = [], []

        with self.progressBar(len(self.forest_adapter.get_trees())) as prg:
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None, tree,
                    node_color_func=self._type_specific('_get_node_color'),
                    interactive=False, padding=100)
                self.grid_items.append(GridItem(
                    ptree, self.grid, max_size=self._calculate_zoom(self.zoom)
                ))
                self.ptrees.append(ptree)
                prg.advance()
        self.grid.set_items(self.grid_items)
        # This is necessary when adding items for the first time
        if self.grid:
            width = (self.view.width() -
                     self.view.verticalScrollBar().width())
            self.grid.reflow(width)
            self.grid.setPreferredWidth(width)
        self.ui_size_calc_combo.setEnabled(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if len(self.scene.selectedItems()) == 0:
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        obj = self.model.trees[self.selected_tree_index]
        obj.instances = self.dataset
        obj.meta_target_class_index = self.target_class_index
        obj.meta_size_calc_idx = self.size_calc_idx
        obj.meta_size_log_scale = self.size_log_scale
        obj.meta_depth_limit = self.depth_limit

        self.send('Tree', obj)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)

    def _type_specific(self, method):
        """A best effort method getter that somewhat separates logic specific
        to classification and regression trees.
        This relies on conventional naming of specific methods, e.g.
        a method name _get_tooltip would need to be defined like so:
        _classification_get_tooltip and _regression_get_tooltip, since they are
        both specific.

        Parameters
        ----------
        method : str
            Method name that we would like to call.

        Returns
        -------
        callable or None

        """
        if self.forest_type == self.CLASSIFICATION:
            return getattr(self, '_classification' + method)
        elif self.forest_type == self.REGRESSION:
            return getattr(self, '_regression' + method)
        else:
            return None

    # CLASSIFICATION FOREST SPECIFIC METHODS
    def _classification_update_target_class_combo(self):
        self._clear_target_class_combo()
        self.ui_target_class_combo.addItem('None')
        values = [c.title() for c in
                  self.model.domain.class_vars[0].values]
        self.ui_target_class_combo.addItems(values)

    def _classification_get_color_palette(self):
        return [QColor(*c) for c in self.model.domain.class_var.colors]

    def _classification_get_node_color(self, adapter, tree_node):
        # this is taken almost directly from the existing classification tree
        # viewer
        colors = self.color_palette
        distribution = adapter.get_distribution(tree_node.label)[0]
        total = np.sum(distribution)

        if self.target_class_index:
            p = distribution[self.target_class_index - 1] / total
            color = colors[self.target_class_index - 1].lighter(200 - 100 * p)
        else:
            modus = np.argmax(distribution)
            p = distribution[modus] / (total or 1)
            color = colors[int(modus)].lighter(400 - 300 * p)
        return color

    # REGRESSION FOREST SPECIFIC METHODS
    def _regression_update_target_class_combo(self):
        self._clear_target_class_combo()
        self.ui_target_class_combo.addItems(
            list(zip(*self.REGRESSION_COLOR_CALC))[0])
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _regression_get_color_palette(self):
        return ContinuousPaletteGenerator(
            *self.forest_adapter.domain.class_var.colors)

    def _regression_get_node_color(self, adapter, tree_node):
        return self.REGRESSION_COLOR_CALC[self.target_class_index][1](
            adapter, tree_node
        )

    def _color_class_mean(self, adapter, tree_node):
        # calculate node colors relative to the mean of the node samples
        min_mean = np.min(self.clf_dataset.Y)
        max_mean = np.max(self.clf_dataset.Y)
        instances = adapter.get_instances_in_nodes(self.clf_dataset,
                                                   tree_node.label)
        mean = np.mean(instances.Y)

        return self.color_palette[(mean - min_mean) / (max_mean - min_mean)]

    def _color_stddev(self, adapter, tree_node):
        # calculate node colors relative to the standard deviation in the node
        # samples
        min_mean, max_mean = 0, np.std(self.clf_dataset.Y)
        instances = adapter.get_instances_in_nodes(self.clf_dataset,
                                                   tree_node.label)
        std = np.std(instances.Y)

        return self.color_palette[(std - min_mean) / (max_mean - min_mean)]
Exemplo n.º 3
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    size_log_scale = settings.Setting(2)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    CLASSIFICATION, REGRESSION = range(2)

    def __init__(self):
        super().__init__()
        # Instance variables
        self.forest_type = self.CLASSIFICATION
        self.model = None
        self.forest_adapter = None
        self.dataset = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x * self.size_log_scale)),
        ]

        self.REGRESSION_COLOR_CALC = [
            ('None', lambda _, __: QColor(255, 255, 255)),
            ('Class mean', self._color_class_mean),
            ('Standard deviation', self._color_stddev),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info, label='')

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(box_display,
                                           self,
                                           'depth_limit',
                                           label='Depth',
                                           ticks=False,
                                           callback=self.max_depth_changed)
        self.ui_target_class_combo = gui.comboBox(
            box_display,
            self,
            'target_class_index',
            label='Target class',
            orientation=Qt.Horizontal,
            items=[],
            contentsLength=8,
            callback=self.target_colors_changed)
        self.ui_size_calc_combo = gui.comboBox(
            box_display,
            self,
            'size_calc_idx',
            label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0],
            contentsLength=8,
            callback=self.size_calc_changed)
        self.ui_zoom_slider = gui.hSlider(box_display,
                                          self,
                                          'zoom',
                                          label='Zoom',
                                          ticks=False,
                                          minValue=20,
                                          maxValue=150,
                                          callback=self.zoom_changed,
                                          createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred,
                                       QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            if isinstance(model, RandomForestClassifier):
                self.forest_type = self.CLASSIFICATION
            elif isinstance(model, RandomForestRegressor):
                self.forest_type = self.REGRESSION
            else:
                raise RuntimeError('Invalid type of forest.')

            self.forest_adapter = self._get_forest_adapter(self.model)
            self.color_palette = self._type_specific('_get_color_palette')()
            self._draw_trees()

            self.dataset = model.instances
            # this bit is important for the regression classifier
            if self.dataset is not None and \
                    self.dataset.domain != model.domain:
                self.clf_dataset = Table.from_table(self.model.domain,
                                                    self.dataset)
            else:
                self.clf_dataset = self.dataset

            self._update_info_box()
            self._type_specific('_update_target_class_combo')()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    # CONTROL AREA CALLBACKS
    def max_depth_changed(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def target_colors_changed(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_has_changed()

    def size_calc_changed(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self.grid.clear()
            self._draw_trees()
            # Keep the selected item
            if self.selected_tree_index != -1:
                self.grid_items[self.selected_tree_index].setSelected(True)
            self.max_depth_changed()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    # MODEL CHANGED METHODS
    def _update_info_box(self):
        self.ui_info.setText('Trees: {}'.format(
            len(self.forest_adapter.get_trees())))

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    # MODEL CLEARED METHODS
    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    # HELPFUL METHODS
    def _get_max_depth(self):
        return max([tree.tree_adapter.max_depth for tree in self.ptrees])

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    def _draw_trees(self):
        self.ui_size_calc_combo.setEnabled(False)
        self.grid_items, self.ptrees = [], []

        with self.progressBar(len(self.forest_adapter.get_trees())) as prg:
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None,
                    tree,
                    node_color_func=self._type_specific('_get_node_color'),
                    interactive=False,
                    padding=100)
                self.grid_items.append(
                    GridItem(ptree,
                             self.grid,
                             max_size=self._calculate_zoom(self.zoom)))
                self.ptrees.append(ptree)
                prg.advance()
        self.grid.set_items(self.grid_items)
        # This is necessary when adding items for the first time
        if self.grid:
            width = (self.view.width() - self.view.verticalScrollBar().width())
            self.grid.reflow(width)
            self.grid.setPreferredWidth(width)
        self.ui_size_calc_combo.setEnabled(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if len(self.scene.selectedItems()) == 0:
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        obj = self.model.trees[self.selected_tree_index]
        obj.instances = self.dataset
        obj.meta_target_class_index = self.target_class_index
        obj.meta_size_calc_idx = self.size_calc_idx
        obj.meta_size_log_scale = self.size_log_scale
        obj.meta_depth_limit = self.depth_limit

        self.send('Tree', obj)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)

    def _type_specific(self, method):
        """A best effort method getter that somewhat separates logic specific
        to classification and regression trees.
        This relies on conventional naming of specific methods, e.g.
        a method name _get_tooltip would need to be defined like so:
        _classification_get_tooltip and _regression_get_tooltip, since they are
        both specific.

        Parameters
        ----------
        method : str
            Method name that we would like to call.

        Returns
        -------
        callable or None

        """
        if self.forest_type == self.CLASSIFICATION:
            return getattr(self, '_classification' + method)
        elif self.forest_type == self.REGRESSION:
            return getattr(self, '_regression' + method)
        else:
            return None

    # CLASSIFICATION FOREST SPECIFIC METHODS
    def _classification_update_target_class_combo(self):
        self._clear_target_class_combo()
        self.ui_target_class_combo.addItem('None')
        values = [c.title() for c in self.model.domain.class_vars[0].values]
        self.ui_target_class_combo.addItems(values)

    def _classification_get_color_palette(self):
        return [QColor(*c) for c in self.model.domain.class_var.colors]

    def _classification_get_node_color(self, adapter, tree_node):
        # this is taken almost directly from the existing classification tree
        # viewer
        colors = self.color_palette
        distribution = adapter.get_distribution(tree_node.label)[0]
        total = np.sum(distribution)

        if self.target_class_index:
            p = distribution[self.target_class_index - 1] / total
            color = colors[self.target_class_index - 1].lighter(200 - 100 * p)
        else:
            modus = np.argmax(distribution)
            p = distribution[modus] / (total or 1)
            color = colors[int(modus)].lighter(400 - 300 * p)
        return color

    # REGRESSION FOREST SPECIFIC METHODS
    def _regression_update_target_class_combo(self):
        self._clear_target_class_combo()
        self.ui_target_class_combo.addItems(
            list(zip(*self.REGRESSION_COLOR_CALC))[0])
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _regression_get_color_palette(self):
        return ContinuousPaletteGenerator(
            *self.forest_adapter.domain.class_var.colors)

    def _regression_get_node_color(self, adapter, tree_node):
        return self.REGRESSION_COLOR_CALC[self.target_class_index][1](
            adapter, tree_node)

    def _color_class_mean(self, adapter, tree_node):
        # calculate node colors relative to the mean of the node samples
        min_mean = np.min(self.clf_dataset.Y)
        max_mean = np.max(self.clf_dataset.Y)
        instances = adapter.get_instances_in_nodes(self.clf_dataset,
                                                   tree_node.label)
        mean = np.mean(instances.Y)

        return self.color_palette[(mean - min_mean) / (max_mean - min_mean)]

    def _color_stddev(self, adapter, tree_node):
        # calculate node colors relative to the standard deviation in the node
        # samples
        min_mean, max_mean = 0, np.std(self.clf_dataset.Y)
        instances = adapter.get_instances_in_nodes(self.clf_dataset,
                                                   tree_node.label)
        std = np.std(instances.Y)

        return self.color_palette[(std - min_mean) / (max_mean - min_mean)]
Exemplo n.º 4
0
class OWVennDiagram(widget.OWWidget):
    name = "Venn Diagram"
    description = "A graphical visualization of the overlap of data instances " \
                  "from a collection of input datasets."
    icon = "icons/VennDiagram.svg"
    priority = 280
    keywords = []
    settings_version = 2

    class Inputs:
        data = Input("Data", Table, multiple=True)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    class Error(widget.OWWidget.Error):
        instances_mismatch = Msg("Data sets do not contain the same instances.")
        too_many_inputs = Msg("Venn diagram accepts at most five datasets.")

    class Warning(widget.OWWidget.Warning):
        renamed_vars = Msg("Some variables have been renamed "
                           "to avoid duplicates.\n{}")

    selection: list

    settingsHandler = settings.DomainContextHandler()
    # Indices of selected disjoint areas
    selection = settings.Setting([], schema_only=True)
    #: Output unique items (one output row for every unique instance `key`)
    #: or preserve all duplicates in the output.
    output_duplicates = settings.Setting(False)
    autocommit = settings.Setting(True)
    rowwise = settings.Setting(True)
    selected_feature = settings.ContextSetting(None)

    want_control_area = False
    graph_name = "scene"
    atr_types = ['attributes', 'metas', 'class_vars']
    atr_vals = {'metas': 'metas', 'attributes': 'X', 'class_vars': 'Y'}
    row_vals = {'attributes': 'x', 'class_vars': 'y', 'metas': 'metas'}

    def __init__(self):
        super().__init__()

        # Diagram update is in progress
        self._updating = False
        # Input update is in progress
        self._inputUpdate = False
        # Input datasets in the order they were 'connected'.
        self.data = {}
        # Extracted input item sets in the order they were 'connected'
        self.itemsets = {}
        # A list with 2 ** len(self.data) elements that store item sets
        # belonging to each area
        self.disjoint = []
        # A list with  2 ** len(self.data) elements that store keys of tables
        # intersected in each area
        self.area_keys = []

        # Main area view
        self.scene = QGraphicsScene(self)
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing)
        self.view.setBackgroundRole(QPalette.Window)
        self.view.setFrameStyle(QGraphicsView.StyledPanel)

        self.mainArea.layout().addWidget(self.view)
        self.vennwidget = VennDiagram()
        self._resize()
        self.vennwidget.itemTextEdited.connect(self._on_itemTextEdited)
        self.scene.selectionChanged.connect(self._on_selectionChanged)

        self.scene.addItem(self.vennwidget)

        controls = gui.hBox(self.mainArea)
        box = gui.radioButtonsInBox(
            controls, self, 'rowwise',
            ["Columns (features)", "Rows (instances), matched by", ],
            box="Elements", callback=self._on_matching_changed
        )
        gui.comboBox(
            gui.indentedBox(box), self, "selected_feature",
            model=itemmodels.VariableListModel(placeholder="Instance identity"),
            callback=self._on_inputAttrActivated
            )
        box.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)

        self.outputs_box = box = gui.vBox(controls, "Output")
        self.output_duplicates_cb = gui.checkBox(
            box, self, "output_duplicates", "Output duplicates",
            callback=lambda: self.commit())  # pylint: disable=unnecessary-lambda
        gui.auto_send(box, self, "autocommit", box=False)
        self.output_duplicates_cb.setEnabled(bool(self.rowwise))
        self._queue = []

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self._resize()

    def showEvent(self, event):
        super().showEvent(event)
        self._resize()

    def _resize(self):
        # vennwidget draws so that the diagram fits into its geometry,
        # while labels take further 120 pixels, hence -120 in below formula
        size = max(200, min(self.view.width(), self.view.height()) - 120)
        self.vennwidget.resize(size, size)
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    @Inputs.data
    @check_sql_input
    def setData(self, data, key=None):
        self.Error.too_many_inputs.clear()
        if not self._inputUpdate:
            self._inputUpdate = True
        if key in self.data:
            if data is None:
                # Remove the input
                # Clear possible warnings.
                self.Warning.clear()
                del self.data[key]
            else:
                # Update existing item
                self.data[key] = self.data[key]._replace(name=data.name, table=data)

        elif data is not None:
            # TODO: Allow setting more them 5 inputs and let the user
            # select the 5 to display.
            if len(self.data) == 5:
                self.Error.too_many_inputs()
                return
            # Add a new input
            self.data[key] = _InputData(key, data.name, data)
        self._setInterAttributes()

    def data_equality(self):
        """ Checks if all input datasets have same ids. """
        if not self.data.values():
            return True
        sets = []
        for val in self.data.values():
            sets.append(set(val.table.ids))
        inter = reduce(set.intersection, sets)
        return len(inter) == max(map(len, sets))

    def settings_compatible(self):
        self.Error.instances_mismatch.clear()
        if not self.rowwise:
            if not self.data_equality():
                self.vennwidget.clear()
                self.Error.instances_mismatch()
                self.itemsets = {}
                return False
        return True

    def handleNewSignals(self):
        self._inputUpdate = False
        self.vennwidget.clear()
        if not self.settings_compatible():
            self.invalidateOutput()
            return

        self._createItemsets()
        self._createDiagram()
        # If autocommit is enabled, _createDiagram already outputs data
        # If not, call unconditional_commit from here
        if not self.autocommit:
            self.unconditional_commit()

        self._updateInfo()
        super().handleNewSignals()

    def intersectionStringAttrs(self):
        sets = [set(string_attributes(data_.table.domain)) for data_ in self.data.values()]
        if sets:
            return reduce(set.intersection, sets)
        return set()

    def _setInterAttributes(self):
        model = self.controls.selected_feature.model()
        model[:] = [None] + list(self.intersectionStringAttrs())
        if self.selected_feature:
            names = (var.name for var in model if var)
            if self.selected_feature.name not in names:
                self.selected_feature = model[0]

    def _itemsForInput(self, key):
        """
        Calculates input for venn diagram, according to user's settings.
        """
        table = self.data[key].table
        attr = self.selected_feature
        if attr:
            return [str(inst[attr]) for inst in table
                    if not np.isnan(inst[attr])]
        else:
            return list(table.ids)

    def _createItemsets(self):
        """
        Create itemsets over rows or columns (domains) of input tables.
        """
        olditemsets = dict(self.itemsets)
        self.itemsets.clear()

        for key, input_ in self.data.items():
            if self.rowwise:
                items = self._itemsForInput(key)
            else:
                items = [el.name for el in input_.table.domain.attributes]
            name = input_.name
            if key in olditemsets and olditemsets[key].name == name:
                # Reuse the title (which might have been changed by the user)
                title = olditemsets[key].title
            else:
                title = name

            itemset = _ItemSet(key=key, name=name, title=title, items=items)
            self.itemsets[key] = itemset

    def _createDiagram(self):
        self._updating = True

        oldselection = list(self.selection)

        n = len(self.itemsets)
        self.disjoint, self.area_keys = \
            self.get_disjoint(set(s.items) for s in self.itemsets.values())

        vennitems = []
        colors = colorpalettes.LimitedDiscretePalette(n, force_hsv=True)

        for i, item in enumerate(self.itemsets.values()):
            cnt = len(set(item.items))
            cnt_all = len(item.items)
            if cnt != cnt_all:
                fmt = '{} <i>(all: {})</i>'
            else:
                fmt = '{}'
            counts = fmt.format(cnt, cnt_all)
            gr = VennSetItem(text=item.title, informativeText=counts)
            color = colors[i]
            color.setAlpha(100)
            gr.setBrush(QBrush(color))
            gr.setPen(QPen(Qt.NoPen))
            vennitems.append(gr)

        self.vennwidget.setItems(vennitems)

        for i, area in enumerate(self.vennwidget.vennareas()):
            area_items = list(map(str, list(self.disjoint[i])))
            if i:
                area.setText("{0}".format(len(area_items)))

            label = disjoint_set_label(i, n, simplify=False)
            head = "<h4>|{}| = {}</h4>".format(label, len(area_items))
            if len(area_items) > 32:
                items_str = ", ".join(map(escape, area_items[:32]))
                hidden = len(area_items) - 32
                tooltip = ("{}<span>{}, ...</br>({} items not shown)<span>"
                           .format(head, items_str, hidden))
            elif area_items:
                tooltip = "{}<span>{}</span>".format(
                    head,
                    ", ".join(map(escape, area_items))
                )
            else:
                tooltip = head

            area.setToolTip(tooltip)

            area.setPen(QPen(QColor(10, 10, 10, 200), 1.5))
            area.setFlag(QGraphicsPathItem.ItemIsSelectable, True)
            area.setSelected(i in oldselection)

        self._updating = False
        self._on_selectionChanged()

    def _updateInfo(self):
        # Clear all warnings
        self.warning()

        if self.selected_feature is None:
            no_idx = ["#{}".format(i + 1)
                      for i, key in enumerate(self.data)
                      if not source_attributes(self.data[key].table.domain)]
            if len(no_idx) == 1:
                self.warning("Dataset {} has no suitable identifiers."
                             .format(no_idx[0]))
            elif len(no_idx) > 1:
                self.warning("Datasets {} and {} have no suitable identifiers."
                             .format(", ".join(no_idx[:-1]), no_idx[-1]))

    def _on_selectionChanged(self):
        if self._updating:
            return

        areas = self.vennwidget.vennareas()
        self.selection = [i for i, area in enumerate(areas) if area.isSelected()]
        self.invalidateOutput()

    def _on_matching_changed(self):
        self.output_duplicates_cb.setEnabled(bool(self.rowwise))
        if not self.settings_compatible():
            self.invalidateOutput()
            return
        self._createItemsets()
        self._createDiagram()
        self._updateInfo()

    def _on_inputAttrActivated(self):
        self.rowwise = 1
        self._on_matching_changed()

    def _on_itemTextEdited(self, index, text):
        text = str(text)
        key = list(self.itemsets)[index]
        self.itemsets[key] = self.itemsets[key]._replace(title=text)

    def invalidateOutput(self):
        self.commit()

    def merge_data(self, domain, values, ids=None):
        X, metas, class_vars = None, None, None
        renamed = []
        for val in domain.values():
            names = [var.name for var in val]
            unique_names = get_unique_names_duplicates(names)
            for n, u, idx, var in zip(names, unique_names, count(), val):
                if n != u:
                    val[idx] = var.copy(name=u)
                    renamed.append(n)
        if renamed:
            self.Warning.renamed_vars(', '.join(renamed))
        if 'attributes' in values:
            X = np.hstack(values['attributes'])
        if 'metas' in values:
            metas = np.hstack(values['metas'])
            n = len(metas)
        if 'class_vars' in values:
            class_vars = np.hstack(values['class_vars'])
            n = len(class_vars)
        if X is None:
            X = np.empty((n, 0))
        table = Table.from_numpy(Domain(**domain), X, class_vars, metas)
        if ids is not None:
            table.ids = ids
        return table

    def extract_columnwise(self, var_dict, columns=None):
        domain = {type_ : [] for type_ in self.atr_types}
        values = defaultdict(list)
        renamed = []
        for atr_type, vars_dict in var_dict.items():
            for var_name, var_data in vars_dict.items():
                is_selected = bool(columns) and var_name.name in columns
                if var_data[0]:
                    #columns are different, copy all, rename them
                    for var, table_key in var_data[1]:
                        idx = list(self.data).index(table_key) + 1
                        new_atr = var.copy(name=f'{var_name.name} ({idx})')
                        if columns and atr_type == 'attributes':
                            new_atr.attributes['Selected'] = is_selected
                        domain[atr_type].append(new_atr)
                        renamed.append(var_name.name)
                        values[atr_type].append(getattr(self.data[table_key].table[:, var_name],
                                                        self.atr_vals[atr_type])
                                                .reshape(-1, 1))
                else:
                    new_atr = var_data[1][0][0].copy()
                    if columns and atr_type == 'attributes':
                        new_atr.attributes['Selected'] = is_selected
                    domain[atr_type].append(new_atr)
                    values[atr_type].append(getattr(self.data[var_data[1][0][1]].table[:, var_name],
                                                    self.atr_vals[atr_type])
                                            .reshape(-1, 1))
        if renamed:
            self.Warning.renamed_vars(', '.join(renamed))
        return self.merge_data(domain, values)

    def curry_merge(self, table_key, atr_type, ids=None, selection=False):
        if self.rowwise:
            check_equality = self.arrays_equal_rows
        else:
            check_equality = self.arrays_equal_cols

        def inner(new_atrs, atr):
            """
            Atrs - list of variables we wish to merge
            new_atrs - dictionary where key is old var, val
                is [is_different:bool, table_keys:list]), is_different is set to True,
                if we are outputing duplicates, but the value is arbitrary
            """
            if atr in new_atrs:
                if not selection and self.output_duplicates:
                    #if output_duplicates, we just check if compute value is the same
                    new_atrs[atr][0] = True
                elif not new_atrs[atr][0]:
                    for var, key in new_atrs[atr][1]:
                        if not check_equality(table_key,
                                              key,
                                              atr.name,
                                              self.atr_vals[atr_type],
                                              type(var), ids):
                            new_atrs[atr][0] = True
                            break
                new_atrs[atr][1].append((atr, table_key))
            else:
                new_atrs[atr] = [False, [(atr, table_key)]]
            return new_atrs
        return inner

    def arrays_equal_rows(self, key1, key2, name, data_type, type_, ids):
        #gets masks, compares same as cols
        t1 = self.data[key1].table
        t2 = self.data[key2].table
        inter_val = set(ids[key1]) & set(ids[key2])
        t1_inter = [ids[key1][val] for val in inter_val]
        t2_inter = [ids[key2][val] for val in inter_val]
        return arrays_equal(
            getattr(t1[t1_inter, name],
                    data_type).reshape(-1, 1),
            getattr(t2[t2_inter, name],
                    data_type).reshape(-1, 1),
            type_)

    def arrays_equal_cols(self, key1, key2, name, data_type, type_, _ids=None):
        return arrays_equal(
            getattr(self.data[key1].table[:, name],
                    data_type),
            getattr(self.data[key2].table[:, name],
                    data_type),
            type_)

    def create_from_columns(self, columns, relevant_keys, get_selected):
        """
        Columns are duplicated only if values differ (even
        if only in order of values), origin table name and input slot is added to column name.
        """
        var_dict = {}
        for atr_type in self.atr_types:
            container = {}
            for table_key in relevant_keys:
                table = self.data[table_key].table
                if atr_type == 'attributes':
                    if get_selected:
                        atrs = list(compress(table.domain.attributes,
                                             [c.name in columns for c in table.domain.attributes]))
                    else:
                        atrs = getattr(table.domain, atr_type)
                else:
                    atrs = getattr(table.domain, atr_type)
                merge_vars = self.curry_merge(table_key, atr_type)
                container = reduce(merge_vars, atrs, container)
            var_dict[atr_type] = container

        if get_selected:
            annotated = self.extract_columnwise(var_dict, None)
        else:
            annotated = self.extract_columnwise(var_dict, columns)

        return annotated

    def extract_rowwise(self, var_dict, ids=None, selection=False):
        """
        keys : ['attributes', 'metas', 'class_vars']
        vals: new_atrs - dictionary where key is old name, val
            is [is_different:bool, table_keys:list])
        ids: dict with ids for each table
        """
        all_ids = sorted(reduce(set.union, [set(val) for val in ids.values()], set()))

        permutations = {}
        for table_key, dict_ in ids.items():
            permutations[table_key] = get_perm(list(dict_), all_ids)

        domain = {type_ : [] for type_ in self.atr_types}
        values = defaultdict(list)
        renamed = []
        for atr_type, vars_dict in var_dict.items():
            for var_name, var_data in vars_dict.items():
                different = var_data[0]
                if different:
                    # Columns are different, copy and rename them.
                    # Renaming is done here to mark appropriately the source table.
                    # Additional strange clashes are checked later in merge_data
                    for var, table_key in var_data[1]:
                        temp = self.data[table_key].table
                        idx = list(self.data).index(table_key) + 1
                        domain[atr_type].append(var.copy(name='{} ({})'.format(var_name, idx)))
                        renamed.append(var_name.name)
                        v = getattr(temp[list(ids[table_key].values()), var_name],
                                    self.atr_vals[atr_type])
                        perm = permutations[table_key]
                        if len(v) < len(all_ids):
                            values[atr_type].append(pad_columns(v, perm, len(all_ids)))
                        else:
                            values[atr_type].append(v[perm].reshape(-1, 1))
                else:
                    value = np.full((len(all_ids), 1), np.nan)
                    domain[atr_type].append(var_data[1][0][0].copy())
                    for _, table_key in var_data[1]:
                        #different tables have different part of the same attribute vector
                        perm = permutations[table_key]
                        v = getattr(self.data[table_key].table[list(ids[table_key].values()),
                                                               var_name],
                                    self.atr_vals[atr_type]).reshape(-1, 1)
                        value = value.astype(v.dtype, copy=False)
                        value[perm] = v
                    values[atr_type].append(value)

        if renamed:
            self.Warning.renamed_vars(', '.join(renamed))
        ids = None if self.selected_feature else np.array(all_ids)
        table = self.merge_data(domain, values, ids)
        if selection:
            mask = [idx in self.selected_items for idx in all_ids]
            return create_annotated_table(table, mask)
        return table

    def get_indices(self, table, selection):
        """Returns mappings of ids (be it row id or string) to indices in tables"""
        if self.selected_feature:
            if self.output_duplicates and selection:
                items, inverse = np.unique(getattr(table[:, self.selected_feature], 'metas'),
                                           return_inverse=True)
                ids = [np.nonzero(inverse == idx)[0] for idx in range(len(items))]
            else:
                items, ids = np.unique(getattr(table[:, self.selected_feature], 'metas'),
                                       return_index=True)

        else:
            items = table.ids
            ids = range(len(table))

        if selection:
            return {item: idx for item, idx in zip(items, ids)
                    if item in self.selected_items}

        return dict(zip(items, ids))

    def get_indices_to_match_by(self, relevant_keys, selection=False):
        dict_ = {}
        for key in relevant_keys:
            table = self.data[key].table
            dict_[key] = self.get_indices(table, selection)
        return dict_

    def create_from_rows(self, relevant_ids, selection=False):
        var_dict = {}
        for atr_type in self.atr_types:
            container = {}
            for table_key in relevant_ids:
                merge_vars = self.curry_merge(table_key, atr_type, relevant_ids, selection)
                atrs = getattr(self.data[table_key].table.domain, atr_type)
                container = reduce(merge_vars, atrs, container)
            var_dict[atr_type] = container
        if self.output_duplicates and not selection:
            return self.extract_rowwise_duplicates(var_dict, relevant_ids)
        return self.extract_rowwise(var_dict, relevant_ids, selection)

    def expand_table(self, table, atrs, metas, cv):
        exp = []
        n = 1 if isinstance(table, RowInstance) else len(table)
        if isinstance(table, RowInstance):
            ids = table.id.reshape(-1, 1)
            atr_vals = self.row_vals
        else:
            ids = table.ids.reshape(-1, 1)
            atr_vals = self.atr_vals
        for all_el, atr_type in zip([atrs, metas, cv], self.atr_types):
            cur_el = getattr(table.domain, atr_type)
            array = np.full((n, len(all_el)), np.nan)
            if cur_el:
                perm = get_perm(cur_el, all_el)
                b = getattr(table, atr_vals[atr_type]).reshape(len(array), len(perm))
                array = array.astype(b.dtype, copy=False)
                array[:, perm] = b
            exp.append(array)
        return (*exp, ids)

    def extract_rowwise_duplicates(self, var_dict, ids):
        all_ids = sorted(reduce(set.union, [set(val) for val in ids.values()], set()))
        sort_key = attrgetter("name")
        all_atrs = sorted(var_dict['attributes'], key=sort_key)
        all_metas = sorted(var_dict['metas'], key=sort_key)
        all_cv = sorted(var_dict['class_vars'], key=sort_key)

        all_x, all_y, all_m = [], [], []
        new_table_ids = []
        for idx in all_ids:
            #iterate trough tables with same idx
            for table_key, t_indices in ids.items():
                if idx not in t_indices:
                    continue
                map_ = t_indices[idx]
                extracted = self.data[table_key].table[map_]
                # pylint: disable=unbalanced-tuple-unpacking
                x, m, y, t_ids = self.expand_table(extracted, all_atrs, all_metas, all_cv)
                all_x.append(x)
                all_y.append(y)
                all_m.append(m)
                new_table_ids.append(t_ids)
        domain = {'attributes': all_atrs, 'metas': all_metas, 'class_vars': all_cv}
        values = {'attributes': [np.vstack(all_x)],
                  'metas': [np.vstack(all_m)],
                  'class_vars': [np.vstack(all_y)]}
        return self.merge_data(domain, values, np.vstack(new_table_ids))

    def commit(self):
        if not self.vennwidget.vennareas() or not self.data:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            return

        self.selected_items = reduce(
            set.union, [self.disjoint[index] for index in self.selection],
            set()
        )
        selected_keys = reduce(
            set.union, [set(self.area_keys[area]) for area in self.selection],
            set())
        selected = None

        if self.rowwise:
            if self.selected_items:
                selected_ids = self.get_indices_to_match_by(
                    selected_keys, bool(self.selection))
                selected = self.create_from_rows(selected_ids, False)
            annotated_ids = self.get_indices_to_match_by(self.data)
            annotated = self.create_from_rows(annotated_ids, True)
        else:
            annotated = self.create_from_columns(self.selected_items, self.data, False)
            if self.selected_items:
                selected = self.create_from_columns(self.selected_items, selected_keys, True)

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)

    def send_report(self):
        self.report_plot()

    def get_disjoint(self, sets):
        """
        Return all disjoint subsets.
        """
        sets = list(sets)
        n = len(sets)
        disjoint_sets = [None] * (2 ** n)
        included_tables = [None] * (2 ** n)
        for i in range(2 ** n):
            key = setkey(i, n)
            included = [s for s, inc in zip(sets, key) if inc]
            if included:
                excluded = [s for s, inc in zip(sets, key) if not inc]
                s = reduce(set.intersection, included)
                s = reduce(set.difference, excluded, s)
            else:
                s = set()
            disjoint_sets[i] = s
            included_tables[i] = [k for k, inc in zip(self.data, key) if inc]

        return disjoint_sets, included_tables
Exemplo n.º 5
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    def __init__(self):
        super().__init__()
        self.model = None
        self.forest_adapter = None
        self.instances = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []
        # In some rare cases, we need to prevent commiting, the only one
        # that this currently helps is that when changing the size calculation
        # the trees are all recomputed, but we don't want to output a new tree
        # to keep things consistent with other ui controls.
        self.__prevent_commit = False

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x + 1)),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info)

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(
            box_display, self, 'depth_limit', label='Depth', ticks=False,
            callback=self.update_depth)
        self.ui_target_class_combo = gui.comboBox(
            box_display, self, 'target_class_index', label='Target class',
            orientation=Qt.Horizontal, items=[], contentsLength=8,
            callback=self.update_colors)
        self.ui_size_calc_combo = gui.comboBox(
            box_display, self, 'size_calc_idx', label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8,
            callback=self.update_size_calc)
        self.ui_zoom_slider = gui.hSlider(
            box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20,
            maxValue=150, callback=self.zoom_changed, createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self._draw_trees()
            self.color_palette = self.forest_adapter.get_trees()[0]

            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and self.instances.domain != model.domain:
                self.clf_dataset = self.instances.transform(self.model.domain)
            else:
                self.clf_dataset = self.instances

            self._update_info_box()
            self._update_target_class_combo()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    def update_depth(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def update_colors(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_changed(self.target_class_index)

    def update_size_calc(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            with self._prevent_commit():
                self.grid.clear()
                self._draw_trees()
                # Keep the selected item
                if self.selected_tree_index != -1:
                    self.grid_items[self.selected_tree_index].setSelected(True)
                self.update_depth()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    @contextmanager
    def _prevent_commit(self):
        try:
            self.__prevent_commit = True
            yield
        finally:
            self.__prevent_commit = False

    def _update_info_box(self):
        self.ui_info.setText('Trees: {}'.format(len(self.forest_adapter.get_trees())))

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    def _get_max_depth(self):
        return max(tree.tree_adapter.max_depth for tree in self.ptrees)

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    @contextmanager
    def disable_ui(self):
        """Temporarly disable the UI while trees may be redrawn."""
        try:
            self.ui_size_calc_combo.setEnabled(False)
            self.ui_depth_slider.setEnabled(False)
            self.ui_target_class_combo.setEnabled(False)
            self.ui_zoom_slider.setEnabled(False)
            yield
        finally:
            self.ui_size_calc_combo.setEnabled(True)
            self.ui_depth_slider.setEnabled(True)
            self.ui_target_class_combo.setEnabled(True)
            self.ui_zoom_slider.setEnabled(True)

    def _draw_trees(self):
        self.grid_items, self.ptrees = [], []

        num_trees = len(self.forest_adapter.get_trees())
        with self.progressBar(num_trees) as prg, self.disable_ui():
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None, tree, interactive=False, padding=100,
                    target_class_index=self.target_class_index,
                    weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1]
                )
                grid_item = GridItem(
                    ptree, self.grid, max_size=self._calculate_zoom(self.zoom)
                )
                # We don't want to show flickering while the trees are being
                grid_item.setVisible(False)

                self.grid_items.append(grid_item)
                self.ptrees.append(ptree)
                prg.advance()

            self.grid.set_items(self.grid_items)
            # This is necessary when adding items for the first time
            if self.grid:
                width = (self.view.width() - self.view.verticalScrollBar().width())
                self.grid.reflow(width)
                self.grid.setPreferredWidth(width)
                # After drawing is complete, we show the trees
                for grid_item in self.grid_items:
                    grid_item.setVisible(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if self.__prevent_commit:
            return

        if not self.scene.selectedItems():
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        tree = self.model.trees[self.selected_tree_index]
        tree.instances = self.instances
        tree.meta_target_class_index = self.target_class_index
        tree.meta_size_calc_idx = self.size_calc_idx
        tree.meta_depth_limit = self.depth_limit

        self.send('Tree', tree)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [x for x in self.ui_target_class_combo.parent().children()
                 if isinstance(x, QLabel)][0]

        if self.instances.domain.has_discrete_class:
            label_text = 'Target class'
            values = [c.title() for c in self.instances.domain.class_vars[0].values]
            values.insert(0, 'None')
        else:
            label_text = 'Node color'
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.ui_target_class_combo.addItems(values)
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)