class OWPythagoreanForest(OWWidget): name = 'Pythagorean Forest' description = 'Pythagorean forest for visualising random forests.' icon = 'icons/PythagoreanForest.svg' priority = 1001 inputs = [('Random forest', RandomForestModel, 'set_rf')] outputs = [('Tree', TreeModel)] # Enable the save as feature graph_name = 'scene' # Settings depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) zoom = settings.Setting(50) selected_tree_index = settings.ContextSetting(-1) def __init__(self): super().__init__() self.model = None self.forest_adapter = None self.instances = None self.clf_dataset = None # We need to store refernces to the trees and grid items self.grid_items, self.ptrees = [], [] # In some rare cases, we need to prevent commiting, the only one # that this currently helps is that when changing the size calculation # the trees are all recomputed, but we don't want to output a new tree # to keep things consistent with other ui controls. self.__prevent_commit = False self.color_palette = None # Different methods to calculate the size of squares self.SIZE_CALCULATION = [ ('Normal', lambda x: x), ('Square root', lambda x: sqrt(x)), ('Logarithmic', lambda x: log(x + 1)), ] # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, 'Forest') self.ui_info = gui.widgetLabel(box_info) # Display controls area box_display = gui.widgetBox(self.controlArea, 'Display') self.ui_depth_slider = gui.hSlider(box_display, self, 'depth_limit', label='Depth', ticks=False, callback=self.update_depth) self.ui_target_class_combo = gui.comboBox(box_display, self, 'target_class_index', label='Target class', orientation=Qt.Horizontal, items=[], contentsLength=8, callback=self.update_colors) self.ui_size_calc_combo = gui.comboBox( box_display, self, 'size_calc_idx', label='Size', orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, callback=self.update_size_calc) self.ui_zoom_slider = gui.hSlider(box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20, maxValue=150, callback=self.zoom_changed, createLabel=False) # Stretch to fit the rest of the unsused area gui.rubber(self.controlArea) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.scene = QGraphicsScene(self) self.scene.selectionChanged.connect(self.commit) self.grid = OWGrid() self.grid.geometryChanged.connect(self._update_scene_rect) self.scene.addItem(self.grid) self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn) self.mainArea.layout().addWidget(self.view) self.resize(800, 500) self.clear() def set_rf(self, model=None): """When a different forest is given.""" self.clear() self.model = model if model is not None: self.forest_adapter = self._get_forest_adapter(self.model) self._draw_trees() self.color_palette = self.forest_adapter.get_trees()[0] self.instances = model.instances # this bit is important for the regression classifier if self.instances is not None and self.instances.domain != model.domain: self.clf_dataset = Table.from_table(self.model.domain, self.instances) else: self.clf_dataset = self.instances self._update_info_box() self._update_target_class_combo() self._update_depth_slider() self.selected_tree_index = -1 def clear(self): """Clear all relevant data from the widget.""" self.model = None self.forest_adapter = None self.ptrees = [] self.grid_items = [] self.grid.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() def update_depth(self): """When the max depth slider is changed.""" for tree in self.ptrees: tree.set_depth_limit(self.depth_limit) def update_colors(self): """When the target class or coloring method is changed.""" for tree in self.ptrees: tree.target_class_changed(self.target_class_index) def update_size_calc(self): """When the size calculation of the trees is changed.""" if self.model is not None: with self._prevent_commit(): self.grid.clear() self._draw_trees() # Keep the selected item if self.selected_tree_index != -1: self.grid_items[self.selected_tree_index].setSelected(True) self.update_depth() def zoom_changed(self): """When we update the "Zoom" slider.""" for item in self.grid_items: item.set_max_size(self._calculate_zoom(self.zoom)) width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) @contextmanager def _prevent_commit(self): try: self.__prevent_commit = True yield finally: self.__prevent_commit = False def _update_info_box(self): self.ui_info.setText('Trees: {}'.format( len(self.forest_adapter.get_trees()))) def _update_depth_slider(self): self.depth_limit = self._get_max_depth() self.ui_depth_slider.parent().setEnabled(True) self.ui_depth_slider.setMaximum(self.depth_limit) self.ui_depth_slider.setValue(self.depth_limit) def _clear_info_box(self): self.ui_info.setText('No forest on input.') def _clear_target_class_combo(self): self.ui_target_class_combo.clear() self.target_class_index = 0 self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _clear_depth_slider(self): self.ui_depth_slider.parent().setEnabled(False) self.ui_depth_slider.setMaximum(0) def _get_max_depth(self): return max(tree.tree_adapter.max_depth for tree in self.ptrees) def _get_forest_adapter(self, model): return SklRandomForestAdapter(model) @contextmanager def disable_ui(self): """Temporarly disable the UI while trees may be redrawn.""" try: self.ui_size_calc_combo.setEnabled(False) self.ui_depth_slider.setEnabled(False) self.ui_target_class_combo.setEnabled(False) self.ui_zoom_slider.setEnabled(False) yield finally: self.ui_size_calc_combo.setEnabled(True) self.ui_depth_slider.setEnabled(True) self.ui_target_class_combo.setEnabled(True) self.ui_zoom_slider.setEnabled(True) def _draw_trees(self): self.grid_items, self.ptrees = [], [] num_trees = len(self.forest_adapter.get_trees()) with self.progressBar(num_trees) as prg, self.disable_ui(): for tree in self.forest_adapter.get_trees(): ptree = PythagorasTreeViewer( None, tree, interactive=False, padding=100, target_class_index=self.target_class_index, weight_adjustment=self.SIZE_CALCULATION[ self.size_calc_idx][1]) grid_item = GridItem(ptree, self.grid, max_size=self._calculate_zoom(self.zoom)) # We don't want to show flickering while the trees are being grid_item.setVisible(False) self.grid_items.append(grid_item) self.ptrees.append(ptree) prg.advance() self.grid.set_items(self.grid_items) # This is necessary when adding items for the first time if self.grid: width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) # After drawing is complete, we show the trees for grid_item in self.grid_items: grid_item.setVisible(True) @staticmethod def _calculate_zoom(zoom_level): """Calculate the max size for grid items from zoom level setting.""" return zoom_level * 5 def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self): """Commit the selected tree to output.""" if self.__prevent_commit: return if not self.scene.selectedItems(): self.send('Tree', None) # The selected tree index should only reset when model changes if self.model is None: self.selected_tree_index = -1 return selected_item = self.scene.selectedItems()[0] self.selected_tree_index = self.grid_items.index(selected_item) tree = self.model.trees[self.selected_tree_index] tree.instances = self.instances tree.meta_target_class_index = self.target_class_index tree.meta_size_calc_idx = self.size_calc_idx tree.meta_depth_limit = self.depth_limit self.send('Tree', tree) def send_report(self): """Send report.""" self.report_plot() def _update_scene_rect(self): self.scene.setSceneRect(self.scene.itemsBoundingRect()) def _update_target_class_combo(self): self._clear_target_class_combo() label = [ x for x in self.ui_target_class_combo.parent().children() if isinstance(x, QLabel) ][0] if self.instances.domain.has_discrete_class: label_text = 'Target class' values = [ c.title() for c in self.instances.domain.class_vars[0].values ] values.insert(0, 'None') else: label_text = 'Node color' values = list(ContinuousTreeNode.COLOR_METHODS.keys()) label.setText(label_text) self.ui_target_class_combo.addItems(values) self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def resizeEvent(self, ev): width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) super().resizeEvent(ev)
class OWPythagoreanForest(OWWidget): name = 'Pythagorean Forest' description = 'Pythagorean forest for visualising random forests.' icon = 'icons/PythagoreanForest.svg' priority = 1001 inputs = [('Random forest', RandomForestModel, 'set_rf')] outputs = [('Tree', TreeModel)] # Enable the save as feature graph_name = 'scene' # Settings depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) size_log_scale = settings.Setting(2) zoom = settings.Setting(50) selected_tree_index = settings.ContextSetting(-1) CLASSIFICATION, REGRESSION = range(2) def __init__(self): super().__init__() # Instance variables self.forest_type = self.CLASSIFICATION self.model = None self.forest_adapter = None self.dataset = None self.clf_dataset = None # We need to store refernces to the trees and grid items self.grid_items, self.ptrees = [], [] self.color_palette = None # Different methods to calculate the size of squares self.SIZE_CALCULATION = [ ('Normal', lambda x: x), ('Square root', lambda x: sqrt(x)), ('Logarithmic', lambda x: log(x * self.size_log_scale)), ] self.REGRESSION_COLOR_CALC = [ ('None', lambda _, __: QColor(255, 255, 255)), ('Class mean', self._color_class_mean), ('Standard deviation', self._color_stddev), ] # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, 'Forest') self.ui_info = gui.widgetLabel(box_info, label='') # Display controls area box_display = gui.widgetBox(self.controlArea, 'Display') self.ui_depth_slider = gui.hSlider( box_display, self, 'depth_limit', label='Depth', ticks=False, callback=self.max_depth_changed) self.ui_target_class_combo = gui.comboBox( box_display, self, 'target_class_index', label='Target class', orientation=Qt.Horizontal, items=[], contentsLength=8, callback=self.target_colors_changed) self.ui_size_calc_combo = gui.comboBox( box_display, self, 'size_calc_idx', label='Size', orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, callback=self.size_calc_changed) self.ui_zoom_slider = gui.hSlider( box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20, maxValue=150, callback=self.zoom_changed, createLabel=False) # Stretch to fit the rest of the unsused area gui.rubber(self.controlArea) self.controlArea.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.scene = QGraphicsScene(self) self.scene.selectionChanged.connect(self.commit) self.grid = OWGrid() self.grid.geometryChanged.connect(self._update_scene_rect) self.scene.addItem(self.grid) self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn) self.mainArea.layout().addWidget(self.view) self.resize(800, 500) self.clear() def set_rf(self, model=None): """When a different forest is given.""" self.clear() self.model = model if model is not None: if isinstance(model, RandomForestClassifier): self.forest_type = self.CLASSIFICATION elif isinstance(model, RandomForestRegressor): self.forest_type = self.REGRESSION else: raise RuntimeError('Invalid type of forest.') self.forest_adapter = self._get_forest_adapter(self.model) self.color_palette = self._type_specific('_get_color_palette')() self._draw_trees() self.dataset = model.instances # this bit is important for the regression classifier if self.dataset is not None and \ self.dataset.domain != model.domain: self.clf_dataset = Table.from_table( self.model.domain, self.dataset) else: self.clf_dataset = self.dataset self._update_info_box() self._type_specific('_update_target_class_combo')() self._update_depth_slider() self.selected_tree_index = -1 def clear(self): """Clear all relevant data from the widget.""" self.model = None self.forest_adapter = None self.ptrees = [] self.grid_items = [] self.grid.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() # CONTROL AREA CALLBACKS def max_depth_changed(self): """When the max depth slider is changed.""" for tree in self.ptrees: tree.set_depth_limit(self.depth_limit) def target_colors_changed(self): """When the target class or coloring method is changed.""" for tree in self.ptrees: tree.target_class_has_changed() def size_calc_changed(self): """When the size calculation of the trees is changed.""" if self.model is not None: self.forest_adapter = self._get_forest_adapter(self.model) self.grid.clear() self._draw_trees() # Keep the selected item if self.selected_tree_index != -1: self.grid_items[self.selected_tree_index].setSelected(True) self.max_depth_changed() def zoom_changed(self): """When we update the "Zoom" slider.""" for item in self.grid_items: item.set_max_size(self._calculate_zoom(self.zoom)) width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) # MODEL CHANGED METHODS def _update_info_box(self): self.ui_info.setText( 'Trees: {}'.format(len(self.forest_adapter.get_trees())) ) def _update_depth_slider(self): self.depth_limit = self._get_max_depth() self.ui_depth_slider.parent().setEnabled(True) self.ui_depth_slider.setMaximum(self.depth_limit) self.ui_depth_slider.setValue(self.depth_limit) # MODEL CLEARED METHODS def _clear_info_box(self): self.ui_info.setText('No forest on input.') def _clear_target_class_combo(self): self.ui_target_class_combo.clear() self.target_class_index = 0 self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _clear_depth_slider(self): self.ui_depth_slider.parent().setEnabled(False) self.ui_depth_slider.setMaximum(0) # HELPFUL METHODS def _get_max_depth(self): return max([tree.tree_adapter.max_depth for tree in self.ptrees]) def _get_forest_adapter(self, model): return SklRandomForestAdapter(model) def _draw_trees(self): self.ui_size_calc_combo.setEnabled(False) self.grid_items, self.ptrees = [], [] with self.progressBar(len(self.forest_adapter.get_trees())) as prg: for tree in self.forest_adapter.get_trees(): ptree = PythagorasTreeViewer( None, tree, node_color_func=self._type_specific('_get_node_color'), interactive=False, padding=100) self.grid_items.append(GridItem( ptree, self.grid, max_size=self._calculate_zoom(self.zoom) )) self.ptrees.append(ptree) prg.advance() self.grid.set_items(self.grid_items) # This is necessary when adding items for the first time if self.grid: width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) self.ui_size_calc_combo.setEnabled(True) @staticmethod def _calculate_zoom(zoom_level): """Calculate the max size for grid items from zoom level setting.""" return zoom_level * 5 def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self): """Commit the selected tree to output.""" if len(self.scene.selectedItems()) == 0: self.send('Tree', None) # The selected tree index should only reset when model changes if self.model is None: self.selected_tree_index = -1 return selected_item = self.scene.selectedItems()[0] self.selected_tree_index = self.grid_items.index(selected_item) obj = self.model.trees[self.selected_tree_index] obj.instances = self.dataset obj.meta_target_class_index = self.target_class_index obj.meta_size_calc_idx = self.size_calc_idx obj.meta_size_log_scale = self.size_log_scale obj.meta_depth_limit = self.depth_limit self.send('Tree', obj) def send_report(self): """Send report.""" self.report_plot() def _update_scene_rect(self): self.scene.setSceneRect(self.scene.itemsBoundingRect()) def resizeEvent(self, ev): width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) super().resizeEvent(ev) def _type_specific(self, method): """A best effort method getter that somewhat separates logic specific to classification and regression trees. This relies on conventional naming of specific methods, e.g. a method name _get_tooltip would need to be defined like so: _classification_get_tooltip and _regression_get_tooltip, since they are both specific. Parameters ---------- method : str Method name that we would like to call. Returns ------- callable or None """ if self.forest_type == self.CLASSIFICATION: return getattr(self, '_classification' + method) elif self.forest_type == self.REGRESSION: return getattr(self, '_regression' + method) else: return None # CLASSIFICATION FOREST SPECIFIC METHODS def _classification_update_target_class_combo(self): self._clear_target_class_combo() self.ui_target_class_combo.addItem('None') values = [c.title() for c in self.model.domain.class_vars[0].values] self.ui_target_class_combo.addItems(values) def _classification_get_color_palette(self): return [QColor(*c) for c in self.model.domain.class_var.colors] def _classification_get_node_color(self, adapter, tree_node): # this is taken almost directly from the existing classification tree # viewer colors = self.color_palette distribution = adapter.get_distribution(tree_node.label)[0] total = np.sum(distribution) if self.target_class_index: p = distribution[self.target_class_index - 1] / total color = colors[self.target_class_index - 1].lighter(200 - 100 * p) else: modus = np.argmax(distribution) p = distribution[modus] / (total or 1) color = colors[int(modus)].lighter(400 - 300 * p) return color # REGRESSION FOREST SPECIFIC METHODS def _regression_update_target_class_combo(self): self._clear_target_class_combo() self.ui_target_class_combo.addItems( list(zip(*self.REGRESSION_COLOR_CALC))[0]) self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _regression_get_color_palette(self): return ContinuousPaletteGenerator( *self.forest_adapter.domain.class_var.colors) def _regression_get_node_color(self, adapter, tree_node): return self.REGRESSION_COLOR_CALC[self.target_class_index][1]( adapter, tree_node ) def _color_class_mean(self, adapter, tree_node): # calculate node colors relative to the mean of the node samples min_mean = np.min(self.clf_dataset.Y) max_mean = np.max(self.clf_dataset.Y) instances = adapter.get_instances_in_nodes(self.clf_dataset, tree_node.label) mean = np.mean(instances.Y) return self.color_palette[(mean - min_mean) / (max_mean - min_mean)] def _color_stddev(self, adapter, tree_node): # calculate node colors relative to the standard deviation in the node # samples min_mean, max_mean = 0, np.std(self.clf_dataset.Y) instances = adapter.get_instances_in_nodes(self.clf_dataset, tree_node.label) std = np.std(instances.Y) return self.color_palette[(std - min_mean) / (max_mean - min_mean)]
class OWPythagoreanForest(OWWidget): name = 'Pythagorean Forest' description = 'Pythagorean forest for visualising random forests.' icon = 'icons/PythagoreanForest.svg' priority = 1001 inputs = [('Random forest', RandomForestModel, 'set_rf')] outputs = [('Tree', TreeModel)] # Enable the save as feature graph_name = 'scene' # Settings depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) size_log_scale = settings.Setting(2) zoom = settings.Setting(50) selected_tree_index = settings.ContextSetting(-1) CLASSIFICATION, REGRESSION = range(2) def __init__(self): super().__init__() # Instance variables self.forest_type = self.CLASSIFICATION self.model = None self.forest_adapter = None self.dataset = None self.clf_dataset = None # We need to store refernces to the trees and grid items self.grid_items, self.ptrees = [], [] self.color_palette = None # Different methods to calculate the size of squares self.SIZE_CALCULATION = [ ('Normal', lambda x: x), ('Square root', lambda x: sqrt(x)), ('Logarithmic', lambda x: log(x * self.size_log_scale)), ] self.REGRESSION_COLOR_CALC = [ ('None', lambda _, __: QColor(255, 255, 255)), ('Class mean', self._color_class_mean), ('Standard deviation', self._color_stddev), ] # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, 'Forest') self.ui_info = gui.widgetLabel(box_info, label='') # Display controls area box_display = gui.widgetBox(self.controlArea, 'Display') self.ui_depth_slider = gui.hSlider(box_display, self, 'depth_limit', label='Depth', ticks=False, callback=self.max_depth_changed) self.ui_target_class_combo = gui.comboBox( box_display, self, 'target_class_index', label='Target class', orientation=Qt.Horizontal, items=[], contentsLength=8, callback=self.target_colors_changed) self.ui_size_calc_combo = gui.comboBox( box_display, self, 'size_calc_idx', label='Size', orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, callback=self.size_calc_changed) self.ui_zoom_slider = gui.hSlider(box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20, maxValue=150, callback=self.zoom_changed, createLabel=False) # Stretch to fit the rest of the unsused area gui.rubber(self.controlArea) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.scene = QGraphicsScene(self) self.scene.selectionChanged.connect(self.commit) self.grid = OWGrid() self.grid.geometryChanged.connect(self._update_scene_rect) self.scene.addItem(self.grid) self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn) self.mainArea.layout().addWidget(self.view) self.resize(800, 500) self.clear() def set_rf(self, model=None): """When a different forest is given.""" self.clear() self.model = model if model is not None: if isinstance(model, RandomForestClassifier): self.forest_type = self.CLASSIFICATION elif isinstance(model, RandomForestRegressor): self.forest_type = self.REGRESSION else: raise RuntimeError('Invalid type of forest.') self.forest_adapter = self._get_forest_adapter(self.model) self.color_palette = self._type_specific('_get_color_palette')() self._draw_trees() self.dataset = model.instances # this bit is important for the regression classifier if self.dataset is not None and \ self.dataset.domain != model.domain: self.clf_dataset = Table.from_table(self.model.domain, self.dataset) else: self.clf_dataset = self.dataset self._update_info_box() self._type_specific('_update_target_class_combo')() self._update_depth_slider() self.selected_tree_index = -1 def clear(self): """Clear all relevant data from the widget.""" self.model = None self.forest_adapter = None self.ptrees = [] self.grid_items = [] self.grid.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() # CONTROL AREA CALLBACKS def max_depth_changed(self): """When the max depth slider is changed.""" for tree in self.ptrees: tree.set_depth_limit(self.depth_limit) def target_colors_changed(self): """When the target class or coloring method is changed.""" for tree in self.ptrees: tree.target_class_has_changed() def size_calc_changed(self): """When the size calculation of the trees is changed.""" if self.model is not None: self.forest_adapter = self._get_forest_adapter(self.model) self.grid.clear() self._draw_trees() # Keep the selected item if self.selected_tree_index != -1: self.grid_items[self.selected_tree_index].setSelected(True) self.max_depth_changed() def zoom_changed(self): """When we update the "Zoom" slider.""" for item in self.grid_items: item.set_max_size(self._calculate_zoom(self.zoom)) width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) # MODEL CHANGED METHODS def _update_info_box(self): self.ui_info.setText('Trees: {}'.format( len(self.forest_adapter.get_trees()))) def _update_depth_slider(self): self.depth_limit = self._get_max_depth() self.ui_depth_slider.parent().setEnabled(True) self.ui_depth_slider.setMaximum(self.depth_limit) self.ui_depth_slider.setValue(self.depth_limit) # MODEL CLEARED METHODS def _clear_info_box(self): self.ui_info.setText('No forest on input.') def _clear_target_class_combo(self): self.ui_target_class_combo.clear() self.target_class_index = 0 self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _clear_depth_slider(self): self.ui_depth_slider.parent().setEnabled(False) self.ui_depth_slider.setMaximum(0) # HELPFUL METHODS def _get_max_depth(self): return max([tree.tree_adapter.max_depth for tree in self.ptrees]) def _get_forest_adapter(self, model): return SklRandomForestAdapter(model) def _draw_trees(self): self.ui_size_calc_combo.setEnabled(False) self.grid_items, self.ptrees = [], [] with self.progressBar(len(self.forest_adapter.get_trees())) as prg: for tree in self.forest_adapter.get_trees(): ptree = PythagorasTreeViewer( None, tree, node_color_func=self._type_specific('_get_node_color'), interactive=False, padding=100) self.grid_items.append( GridItem(ptree, self.grid, max_size=self._calculate_zoom(self.zoom))) self.ptrees.append(ptree) prg.advance() self.grid.set_items(self.grid_items) # This is necessary when adding items for the first time if self.grid: width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) self.ui_size_calc_combo.setEnabled(True) @staticmethod def _calculate_zoom(zoom_level): """Calculate the max size for grid items from zoom level setting.""" return zoom_level * 5 def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self): """Commit the selected tree to output.""" if len(self.scene.selectedItems()) == 0: self.send('Tree', None) # The selected tree index should only reset when model changes if self.model is None: self.selected_tree_index = -1 return selected_item = self.scene.selectedItems()[0] self.selected_tree_index = self.grid_items.index(selected_item) obj = self.model.trees[self.selected_tree_index] obj.instances = self.dataset obj.meta_target_class_index = self.target_class_index obj.meta_size_calc_idx = self.size_calc_idx obj.meta_size_log_scale = self.size_log_scale obj.meta_depth_limit = self.depth_limit self.send('Tree', obj) def send_report(self): """Send report.""" self.report_plot() def _update_scene_rect(self): self.scene.setSceneRect(self.scene.itemsBoundingRect()) def resizeEvent(self, ev): width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) super().resizeEvent(ev) def _type_specific(self, method): """A best effort method getter that somewhat separates logic specific to classification and regression trees. This relies on conventional naming of specific methods, e.g. a method name _get_tooltip would need to be defined like so: _classification_get_tooltip and _regression_get_tooltip, since they are both specific. Parameters ---------- method : str Method name that we would like to call. Returns ------- callable or None """ if self.forest_type == self.CLASSIFICATION: return getattr(self, '_classification' + method) elif self.forest_type == self.REGRESSION: return getattr(self, '_regression' + method) else: return None # CLASSIFICATION FOREST SPECIFIC METHODS def _classification_update_target_class_combo(self): self._clear_target_class_combo() self.ui_target_class_combo.addItem('None') values = [c.title() for c in self.model.domain.class_vars[0].values] self.ui_target_class_combo.addItems(values) def _classification_get_color_palette(self): return [QColor(*c) for c in self.model.domain.class_var.colors] def _classification_get_node_color(self, adapter, tree_node): # this is taken almost directly from the existing classification tree # viewer colors = self.color_palette distribution = adapter.get_distribution(tree_node.label)[0] total = np.sum(distribution) if self.target_class_index: p = distribution[self.target_class_index - 1] / total color = colors[self.target_class_index - 1].lighter(200 - 100 * p) else: modus = np.argmax(distribution) p = distribution[modus] / (total or 1) color = colors[int(modus)].lighter(400 - 300 * p) return color # REGRESSION FOREST SPECIFIC METHODS def _regression_update_target_class_combo(self): self._clear_target_class_combo() self.ui_target_class_combo.addItems( list(zip(*self.REGRESSION_COLOR_CALC))[0]) self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _regression_get_color_palette(self): return ContinuousPaletteGenerator( *self.forest_adapter.domain.class_var.colors) def _regression_get_node_color(self, adapter, tree_node): return self.REGRESSION_COLOR_CALC[self.target_class_index][1]( adapter, tree_node) def _color_class_mean(self, adapter, tree_node): # calculate node colors relative to the mean of the node samples min_mean = np.min(self.clf_dataset.Y) max_mean = np.max(self.clf_dataset.Y) instances = adapter.get_instances_in_nodes(self.clf_dataset, tree_node.label) mean = np.mean(instances.Y) return self.color_palette[(mean - min_mean) / (max_mean - min_mean)] def _color_stddev(self, adapter, tree_node): # calculate node colors relative to the standard deviation in the node # samples min_mean, max_mean = 0, np.std(self.clf_dataset.Y) instances = adapter.get_instances_in_nodes(self.clf_dataset, tree_node.label) std = np.std(instances.Y) return self.color_palette[(std - min_mean) / (max_mean - min_mean)]
class OWVennDiagram(widget.OWWidget): name = "Venn Diagram" description = "A graphical visualization of the overlap of data instances " \ "from a collection of input datasets." icon = "icons/VennDiagram.svg" priority = 280 keywords = [] settings_version = 2 class Inputs: data = Input("Data", Table, multiple=True) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) class Error(widget.OWWidget.Error): instances_mismatch = Msg("Data sets do not contain the same instances.") too_many_inputs = Msg("Venn diagram accepts at most five datasets.") class Warning(widget.OWWidget.Warning): renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") selection: list settingsHandler = settings.DomainContextHandler() # Indices of selected disjoint areas selection = settings.Setting([], schema_only=True) #: Output unique items (one output row for every unique instance `key`) #: or preserve all duplicates in the output. output_duplicates = settings.Setting(False) autocommit = settings.Setting(True) rowwise = settings.Setting(True) selected_feature = settings.ContextSetting(None) want_control_area = False graph_name = "scene" atr_types = ['attributes', 'metas', 'class_vars'] atr_vals = {'metas': 'metas', 'attributes': 'X', 'class_vars': 'Y'} row_vals = {'attributes': 'x', 'class_vars': 'y', 'metas': 'metas'} def __init__(self): super().__init__() # Diagram update is in progress self._updating = False # Input update is in progress self._inputUpdate = False # Input datasets in the order they were 'connected'. self.data = {} # Extracted input item sets in the order they were 'connected' self.itemsets = {} # A list with 2 ** len(self.data) elements that store item sets # belonging to each area self.disjoint = [] # A list with 2 ** len(self.data) elements that store keys of tables # intersected in each area self.area_keys = [] # Main area view self.scene = QGraphicsScene(self) self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing) self.view.setBackgroundRole(QPalette.Window) self.view.setFrameStyle(QGraphicsView.StyledPanel) self.mainArea.layout().addWidget(self.view) self.vennwidget = VennDiagram() self._resize() self.vennwidget.itemTextEdited.connect(self._on_itemTextEdited) self.scene.selectionChanged.connect(self._on_selectionChanged) self.scene.addItem(self.vennwidget) controls = gui.hBox(self.mainArea) box = gui.radioButtonsInBox( controls, self, 'rowwise', ["Columns (features)", "Rows (instances), matched by", ], box="Elements", callback=self._on_matching_changed ) gui.comboBox( gui.indentedBox(box), self, "selected_feature", model=itemmodels.VariableListModel(placeholder="Instance identity"), callback=self._on_inputAttrActivated ) box.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) self.outputs_box = box = gui.vBox(controls, "Output") self.output_duplicates_cb = gui.checkBox( box, self, "output_duplicates", "Output duplicates", callback=lambda: self.commit()) # pylint: disable=unnecessary-lambda gui.auto_send(box, self, "autocommit", box=False) self.output_duplicates_cb.setEnabled(bool(self.rowwise)) self._queue = [] def resizeEvent(self, event): super().resizeEvent(event) self._resize() def showEvent(self, event): super().showEvent(event) self._resize() def _resize(self): # vennwidget draws so that the diagram fits into its geometry, # while labels take further 120 pixels, hence -120 in below formula size = max(200, min(self.view.width(), self.view.height()) - 120) self.vennwidget.resize(size, size) self.scene.setSceneRect(self.scene.itemsBoundingRect()) @Inputs.data @check_sql_input def setData(self, data, key=None): self.Error.too_many_inputs.clear() if not self._inputUpdate: self._inputUpdate = True if key in self.data: if data is None: # Remove the input # Clear possible warnings. self.Warning.clear() del self.data[key] else: # Update existing item self.data[key] = self.data[key]._replace(name=data.name, table=data) elif data is not None: # TODO: Allow setting more them 5 inputs and let the user # select the 5 to display. if len(self.data) == 5: self.Error.too_many_inputs() return # Add a new input self.data[key] = _InputData(key, data.name, data) self._setInterAttributes() def data_equality(self): """ Checks if all input datasets have same ids. """ if not self.data.values(): return True sets = [] for val in self.data.values(): sets.append(set(val.table.ids)) inter = reduce(set.intersection, sets) return len(inter) == max(map(len, sets)) def settings_compatible(self): self.Error.instances_mismatch.clear() if not self.rowwise: if not self.data_equality(): self.vennwidget.clear() self.Error.instances_mismatch() self.itemsets = {} return False return True def handleNewSignals(self): self._inputUpdate = False self.vennwidget.clear() if not self.settings_compatible(): self.invalidateOutput() return self._createItemsets() self._createDiagram() # If autocommit is enabled, _createDiagram already outputs data # If not, call unconditional_commit from here if not self.autocommit: self.unconditional_commit() self._updateInfo() super().handleNewSignals() def intersectionStringAttrs(self): sets = [set(string_attributes(data_.table.domain)) for data_ in self.data.values()] if sets: return reduce(set.intersection, sets) return set() def _setInterAttributes(self): model = self.controls.selected_feature.model() model[:] = [None] + list(self.intersectionStringAttrs()) if self.selected_feature: names = (var.name for var in model if var) if self.selected_feature.name not in names: self.selected_feature = model[0] def _itemsForInput(self, key): """ Calculates input for venn diagram, according to user's settings. """ table = self.data[key].table attr = self.selected_feature if attr: return [str(inst[attr]) for inst in table if not np.isnan(inst[attr])] else: return list(table.ids) def _createItemsets(self): """ Create itemsets over rows or columns (domains) of input tables. """ olditemsets = dict(self.itemsets) self.itemsets.clear() for key, input_ in self.data.items(): if self.rowwise: items = self._itemsForInput(key) else: items = [el.name for el in input_.table.domain.attributes] name = input_.name if key in olditemsets and olditemsets[key].name == name: # Reuse the title (which might have been changed by the user) title = olditemsets[key].title else: title = name itemset = _ItemSet(key=key, name=name, title=title, items=items) self.itemsets[key] = itemset def _createDiagram(self): self._updating = True oldselection = list(self.selection) n = len(self.itemsets) self.disjoint, self.area_keys = \ self.get_disjoint(set(s.items) for s in self.itemsets.values()) vennitems = [] colors = colorpalettes.LimitedDiscretePalette(n, force_hsv=True) for i, item in enumerate(self.itemsets.values()): cnt = len(set(item.items)) cnt_all = len(item.items) if cnt != cnt_all: fmt = '{} <i>(all: {})</i>' else: fmt = '{}' counts = fmt.format(cnt, cnt_all) gr = VennSetItem(text=item.title, informativeText=counts) color = colors[i] color.setAlpha(100) gr.setBrush(QBrush(color)) gr.setPen(QPen(Qt.NoPen)) vennitems.append(gr) self.vennwidget.setItems(vennitems) for i, area in enumerate(self.vennwidget.vennareas()): area_items = list(map(str, list(self.disjoint[i]))) if i: area.setText("{0}".format(len(area_items))) label = disjoint_set_label(i, n, simplify=False) head = "<h4>|{}| = {}</h4>".format(label, len(area_items)) if len(area_items) > 32: items_str = ", ".join(map(escape, area_items[:32])) hidden = len(area_items) - 32 tooltip = ("{}<span>{}, ...</br>({} items not shown)<span>" .format(head, items_str, hidden)) elif area_items: tooltip = "{}<span>{}</span>".format( head, ", ".join(map(escape, area_items)) ) else: tooltip = head area.setToolTip(tooltip) area.setPen(QPen(QColor(10, 10, 10, 200), 1.5)) area.setFlag(QGraphicsPathItem.ItemIsSelectable, True) area.setSelected(i in oldselection) self._updating = False self._on_selectionChanged() def _updateInfo(self): # Clear all warnings self.warning() if self.selected_feature is None: no_idx = ["#{}".format(i + 1) for i, key in enumerate(self.data) if not source_attributes(self.data[key].table.domain)] if len(no_idx) == 1: self.warning("Dataset {} has no suitable identifiers." .format(no_idx[0])) elif len(no_idx) > 1: self.warning("Datasets {} and {} have no suitable identifiers." .format(", ".join(no_idx[:-1]), no_idx[-1])) def _on_selectionChanged(self): if self._updating: return areas = self.vennwidget.vennareas() self.selection = [i for i, area in enumerate(areas) if area.isSelected()] self.invalidateOutput() def _on_matching_changed(self): self.output_duplicates_cb.setEnabled(bool(self.rowwise)) if not self.settings_compatible(): self.invalidateOutput() return self._createItemsets() self._createDiagram() self._updateInfo() def _on_inputAttrActivated(self): self.rowwise = 1 self._on_matching_changed() def _on_itemTextEdited(self, index, text): text = str(text) key = list(self.itemsets)[index] self.itemsets[key] = self.itemsets[key]._replace(title=text) def invalidateOutput(self): self.commit() def merge_data(self, domain, values, ids=None): X, metas, class_vars = None, None, None renamed = [] for val in domain.values(): names = [var.name for var in val] unique_names = get_unique_names_duplicates(names) for n, u, idx, var in zip(names, unique_names, count(), val): if n != u: val[idx] = var.copy(name=u) renamed.append(n) if renamed: self.Warning.renamed_vars(', '.join(renamed)) if 'attributes' in values: X = np.hstack(values['attributes']) if 'metas' in values: metas = np.hstack(values['metas']) n = len(metas) if 'class_vars' in values: class_vars = np.hstack(values['class_vars']) n = len(class_vars) if X is None: X = np.empty((n, 0)) table = Table.from_numpy(Domain(**domain), X, class_vars, metas) if ids is not None: table.ids = ids return table def extract_columnwise(self, var_dict, columns=None): domain = {type_ : [] for type_ in self.atr_types} values = defaultdict(list) renamed = [] for atr_type, vars_dict in var_dict.items(): for var_name, var_data in vars_dict.items(): is_selected = bool(columns) and var_name.name in columns if var_data[0]: #columns are different, copy all, rename them for var, table_key in var_data[1]: idx = list(self.data).index(table_key) + 1 new_atr = var.copy(name=f'{var_name.name} ({idx})') if columns and atr_type == 'attributes': new_atr.attributes['Selected'] = is_selected domain[atr_type].append(new_atr) renamed.append(var_name.name) values[atr_type].append(getattr(self.data[table_key].table[:, var_name], self.atr_vals[atr_type]) .reshape(-1, 1)) else: new_atr = var_data[1][0][0].copy() if columns and atr_type == 'attributes': new_atr.attributes['Selected'] = is_selected domain[atr_type].append(new_atr) values[atr_type].append(getattr(self.data[var_data[1][0][1]].table[:, var_name], self.atr_vals[atr_type]) .reshape(-1, 1)) if renamed: self.Warning.renamed_vars(', '.join(renamed)) return self.merge_data(domain, values) def curry_merge(self, table_key, atr_type, ids=None, selection=False): if self.rowwise: check_equality = self.arrays_equal_rows else: check_equality = self.arrays_equal_cols def inner(new_atrs, atr): """ Atrs - list of variables we wish to merge new_atrs - dictionary where key is old var, val is [is_different:bool, table_keys:list]), is_different is set to True, if we are outputing duplicates, but the value is arbitrary """ if atr in new_atrs: if not selection and self.output_duplicates: #if output_duplicates, we just check if compute value is the same new_atrs[atr][0] = True elif not new_atrs[atr][0]: for var, key in new_atrs[atr][1]: if not check_equality(table_key, key, atr.name, self.atr_vals[atr_type], type(var), ids): new_atrs[atr][0] = True break new_atrs[atr][1].append((atr, table_key)) else: new_atrs[atr] = [False, [(atr, table_key)]] return new_atrs return inner def arrays_equal_rows(self, key1, key2, name, data_type, type_, ids): #gets masks, compares same as cols t1 = self.data[key1].table t2 = self.data[key2].table inter_val = set(ids[key1]) & set(ids[key2]) t1_inter = [ids[key1][val] for val in inter_val] t2_inter = [ids[key2][val] for val in inter_val] return arrays_equal( getattr(t1[t1_inter, name], data_type).reshape(-1, 1), getattr(t2[t2_inter, name], data_type).reshape(-1, 1), type_) def arrays_equal_cols(self, key1, key2, name, data_type, type_, _ids=None): return arrays_equal( getattr(self.data[key1].table[:, name], data_type), getattr(self.data[key2].table[:, name], data_type), type_) def create_from_columns(self, columns, relevant_keys, get_selected): """ Columns are duplicated only if values differ (even if only in order of values), origin table name and input slot is added to column name. """ var_dict = {} for atr_type in self.atr_types: container = {} for table_key in relevant_keys: table = self.data[table_key].table if atr_type == 'attributes': if get_selected: atrs = list(compress(table.domain.attributes, [c.name in columns for c in table.domain.attributes])) else: atrs = getattr(table.domain, atr_type) else: atrs = getattr(table.domain, atr_type) merge_vars = self.curry_merge(table_key, atr_type) container = reduce(merge_vars, atrs, container) var_dict[atr_type] = container if get_selected: annotated = self.extract_columnwise(var_dict, None) else: annotated = self.extract_columnwise(var_dict, columns) return annotated def extract_rowwise(self, var_dict, ids=None, selection=False): """ keys : ['attributes', 'metas', 'class_vars'] vals: new_atrs - dictionary where key is old name, val is [is_different:bool, table_keys:list]) ids: dict with ids for each table """ all_ids = sorted(reduce(set.union, [set(val) for val in ids.values()], set())) permutations = {} for table_key, dict_ in ids.items(): permutations[table_key] = get_perm(list(dict_), all_ids) domain = {type_ : [] for type_ in self.atr_types} values = defaultdict(list) renamed = [] for atr_type, vars_dict in var_dict.items(): for var_name, var_data in vars_dict.items(): different = var_data[0] if different: # Columns are different, copy and rename them. # Renaming is done here to mark appropriately the source table. # Additional strange clashes are checked later in merge_data for var, table_key in var_data[1]: temp = self.data[table_key].table idx = list(self.data).index(table_key) + 1 domain[atr_type].append(var.copy(name='{} ({})'.format(var_name, idx))) renamed.append(var_name.name) v = getattr(temp[list(ids[table_key].values()), var_name], self.atr_vals[atr_type]) perm = permutations[table_key] if len(v) < len(all_ids): values[atr_type].append(pad_columns(v, perm, len(all_ids))) else: values[atr_type].append(v[perm].reshape(-1, 1)) else: value = np.full((len(all_ids), 1), np.nan) domain[atr_type].append(var_data[1][0][0].copy()) for _, table_key in var_data[1]: #different tables have different part of the same attribute vector perm = permutations[table_key] v = getattr(self.data[table_key].table[list(ids[table_key].values()), var_name], self.atr_vals[atr_type]).reshape(-1, 1) value = value.astype(v.dtype, copy=False) value[perm] = v values[atr_type].append(value) if renamed: self.Warning.renamed_vars(', '.join(renamed)) ids = None if self.selected_feature else np.array(all_ids) table = self.merge_data(domain, values, ids) if selection: mask = [idx in self.selected_items for idx in all_ids] return create_annotated_table(table, mask) return table def get_indices(self, table, selection): """Returns mappings of ids (be it row id or string) to indices in tables""" if self.selected_feature: if self.output_duplicates and selection: items, inverse = np.unique(getattr(table[:, self.selected_feature], 'metas'), return_inverse=True) ids = [np.nonzero(inverse == idx)[0] for idx in range(len(items))] else: items, ids = np.unique(getattr(table[:, self.selected_feature], 'metas'), return_index=True) else: items = table.ids ids = range(len(table)) if selection: return {item: idx for item, idx in zip(items, ids) if item in self.selected_items} return dict(zip(items, ids)) def get_indices_to_match_by(self, relevant_keys, selection=False): dict_ = {} for key in relevant_keys: table = self.data[key].table dict_[key] = self.get_indices(table, selection) return dict_ def create_from_rows(self, relevant_ids, selection=False): var_dict = {} for atr_type in self.atr_types: container = {} for table_key in relevant_ids: merge_vars = self.curry_merge(table_key, atr_type, relevant_ids, selection) atrs = getattr(self.data[table_key].table.domain, atr_type) container = reduce(merge_vars, atrs, container) var_dict[atr_type] = container if self.output_duplicates and not selection: return self.extract_rowwise_duplicates(var_dict, relevant_ids) return self.extract_rowwise(var_dict, relevant_ids, selection) def expand_table(self, table, atrs, metas, cv): exp = [] n = 1 if isinstance(table, RowInstance) else len(table) if isinstance(table, RowInstance): ids = table.id.reshape(-1, 1) atr_vals = self.row_vals else: ids = table.ids.reshape(-1, 1) atr_vals = self.atr_vals for all_el, atr_type in zip([atrs, metas, cv], self.atr_types): cur_el = getattr(table.domain, atr_type) array = np.full((n, len(all_el)), np.nan) if cur_el: perm = get_perm(cur_el, all_el) b = getattr(table, atr_vals[atr_type]).reshape(len(array), len(perm)) array = array.astype(b.dtype, copy=False) array[:, perm] = b exp.append(array) return (*exp, ids) def extract_rowwise_duplicates(self, var_dict, ids): all_ids = sorted(reduce(set.union, [set(val) for val in ids.values()], set())) sort_key = attrgetter("name") all_atrs = sorted(var_dict['attributes'], key=sort_key) all_metas = sorted(var_dict['metas'], key=sort_key) all_cv = sorted(var_dict['class_vars'], key=sort_key) all_x, all_y, all_m = [], [], [] new_table_ids = [] for idx in all_ids: #iterate trough tables with same idx for table_key, t_indices in ids.items(): if idx not in t_indices: continue map_ = t_indices[idx] extracted = self.data[table_key].table[map_] # pylint: disable=unbalanced-tuple-unpacking x, m, y, t_ids = self.expand_table(extracted, all_atrs, all_metas, all_cv) all_x.append(x) all_y.append(y) all_m.append(m) new_table_ids.append(t_ids) domain = {'attributes': all_atrs, 'metas': all_metas, 'class_vars': all_cv} values = {'attributes': [np.vstack(all_x)], 'metas': [np.vstack(all_m)], 'class_vars': [np.vstack(all_y)]} return self.merge_data(domain, values, np.vstack(new_table_ids)) def commit(self): if not self.vennwidget.vennareas() or not self.data: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) return self.selected_items = reduce( set.union, [self.disjoint[index] for index in self.selection], set() ) selected_keys = reduce( set.union, [set(self.area_keys[area]) for area in self.selection], set()) selected = None if self.rowwise: if self.selected_items: selected_ids = self.get_indices_to_match_by( selected_keys, bool(self.selection)) selected = self.create_from_rows(selected_ids, False) annotated_ids = self.get_indices_to_match_by(self.data) annotated = self.create_from_rows(annotated_ids, True) else: annotated = self.create_from_columns(self.selected_items, self.data, False) if self.selected_items: selected = self.create_from_columns(self.selected_items, selected_keys, True) self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) def send_report(self): self.report_plot() def get_disjoint(self, sets): """ Return all disjoint subsets. """ sets = list(sets) n = len(sets) disjoint_sets = [None] * (2 ** n) included_tables = [None] * (2 ** n) for i in range(2 ** n): key = setkey(i, n) included = [s for s, inc in zip(sets, key) if inc] if included: excluded = [s for s, inc in zip(sets, key) if not inc] s = reduce(set.intersection, included) s = reduce(set.difference, excluded, s) else: s = set() disjoint_sets[i] = s included_tables[i] = [k for k, inc in zip(self.data, key) if inc] return disjoint_sets, included_tables
class OWPythagoreanForest(OWWidget): name = 'Pythagorean Forest' description = 'Pythagorean forest for visualising random forests.' icon = 'icons/PythagoreanForest.svg' priority = 1001 inputs = [('Random forest', RandomForestModel, 'set_rf')] outputs = [('Tree', TreeModel)] # Enable the save as feature graph_name = 'scene' # Settings depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) zoom = settings.Setting(50) selected_tree_index = settings.ContextSetting(-1) def __init__(self): super().__init__() self.model = None self.forest_adapter = None self.instances = None self.clf_dataset = None # We need to store refernces to the trees and grid items self.grid_items, self.ptrees = [], [] # In some rare cases, we need to prevent commiting, the only one # that this currently helps is that when changing the size calculation # the trees are all recomputed, but we don't want to output a new tree # to keep things consistent with other ui controls. self.__prevent_commit = False self.color_palette = None # Different methods to calculate the size of squares self.SIZE_CALCULATION = [ ('Normal', lambda x: x), ('Square root', lambda x: sqrt(x)), ('Logarithmic', lambda x: log(x + 1)), ] # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, 'Forest') self.ui_info = gui.widgetLabel(box_info) # Display controls area box_display = gui.widgetBox(self.controlArea, 'Display') self.ui_depth_slider = gui.hSlider( box_display, self, 'depth_limit', label='Depth', ticks=False, callback=self.update_depth) self.ui_target_class_combo = gui.comboBox( box_display, self, 'target_class_index', label='Target class', orientation=Qt.Horizontal, items=[], contentsLength=8, callback=self.update_colors) self.ui_size_calc_combo = gui.comboBox( box_display, self, 'size_calc_idx', label='Size', orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, callback=self.update_size_calc) self.ui_zoom_slider = gui.hSlider( box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20, maxValue=150, callback=self.zoom_changed, createLabel=False) # Stretch to fit the rest of the unsused area gui.rubber(self.controlArea) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.scene = QGraphicsScene(self) self.scene.selectionChanged.connect(self.commit) self.grid = OWGrid() self.grid.geometryChanged.connect(self._update_scene_rect) self.scene.addItem(self.grid) self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn) self.mainArea.layout().addWidget(self.view) self.resize(800, 500) self.clear() def set_rf(self, model=None): """When a different forest is given.""" self.clear() self.model = model if model is not None: self.forest_adapter = self._get_forest_adapter(self.model) self._draw_trees() self.color_palette = self.forest_adapter.get_trees()[0] self.instances = model.instances # this bit is important for the regression classifier if self.instances is not None and self.instances.domain != model.domain: self.clf_dataset = self.instances.transform(self.model.domain) else: self.clf_dataset = self.instances self._update_info_box() self._update_target_class_combo() self._update_depth_slider() self.selected_tree_index = -1 def clear(self): """Clear all relevant data from the widget.""" self.model = None self.forest_adapter = None self.ptrees = [] self.grid_items = [] self.grid.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() def update_depth(self): """When the max depth slider is changed.""" for tree in self.ptrees: tree.set_depth_limit(self.depth_limit) def update_colors(self): """When the target class or coloring method is changed.""" for tree in self.ptrees: tree.target_class_changed(self.target_class_index) def update_size_calc(self): """When the size calculation of the trees is changed.""" if self.model is not None: with self._prevent_commit(): self.grid.clear() self._draw_trees() # Keep the selected item if self.selected_tree_index != -1: self.grid_items[self.selected_tree_index].setSelected(True) self.update_depth() def zoom_changed(self): """When we update the "Zoom" slider.""" for item in self.grid_items: item.set_max_size(self._calculate_zoom(self.zoom)) width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) @contextmanager def _prevent_commit(self): try: self.__prevent_commit = True yield finally: self.__prevent_commit = False def _update_info_box(self): self.ui_info.setText('Trees: {}'.format(len(self.forest_adapter.get_trees()))) def _update_depth_slider(self): self.depth_limit = self._get_max_depth() self.ui_depth_slider.parent().setEnabled(True) self.ui_depth_slider.setMaximum(self.depth_limit) self.ui_depth_slider.setValue(self.depth_limit) def _clear_info_box(self): self.ui_info.setText('No forest on input.') def _clear_target_class_combo(self): self.ui_target_class_combo.clear() self.target_class_index = 0 self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _clear_depth_slider(self): self.ui_depth_slider.parent().setEnabled(False) self.ui_depth_slider.setMaximum(0) def _get_max_depth(self): return max(tree.tree_adapter.max_depth for tree in self.ptrees) def _get_forest_adapter(self, model): return SklRandomForestAdapter(model) @contextmanager def disable_ui(self): """Temporarly disable the UI while trees may be redrawn.""" try: self.ui_size_calc_combo.setEnabled(False) self.ui_depth_slider.setEnabled(False) self.ui_target_class_combo.setEnabled(False) self.ui_zoom_slider.setEnabled(False) yield finally: self.ui_size_calc_combo.setEnabled(True) self.ui_depth_slider.setEnabled(True) self.ui_target_class_combo.setEnabled(True) self.ui_zoom_slider.setEnabled(True) def _draw_trees(self): self.grid_items, self.ptrees = [], [] num_trees = len(self.forest_adapter.get_trees()) with self.progressBar(num_trees) as prg, self.disable_ui(): for tree in self.forest_adapter.get_trees(): ptree = PythagorasTreeViewer( None, tree, interactive=False, padding=100, target_class_index=self.target_class_index, weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1] ) grid_item = GridItem( ptree, self.grid, max_size=self._calculate_zoom(self.zoom) ) # We don't want to show flickering while the trees are being grid_item.setVisible(False) self.grid_items.append(grid_item) self.ptrees.append(ptree) prg.advance() self.grid.set_items(self.grid_items) # This is necessary when adding items for the first time if self.grid: width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) # After drawing is complete, we show the trees for grid_item in self.grid_items: grid_item.setVisible(True) @staticmethod def _calculate_zoom(zoom_level): """Calculate the max size for grid items from zoom level setting.""" return zoom_level * 5 def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self): """Commit the selected tree to output.""" if self.__prevent_commit: return if not self.scene.selectedItems(): self.send('Tree', None) # The selected tree index should only reset when model changes if self.model is None: self.selected_tree_index = -1 return selected_item = self.scene.selectedItems()[0] self.selected_tree_index = self.grid_items.index(selected_item) tree = self.model.trees[self.selected_tree_index] tree.instances = self.instances tree.meta_target_class_index = self.target_class_index tree.meta_size_calc_idx = self.size_calc_idx tree.meta_depth_limit = self.depth_limit self.send('Tree', tree) def send_report(self): """Send report.""" self.report_plot() def _update_scene_rect(self): self.scene.setSceneRect(self.scene.itemsBoundingRect()) def _update_target_class_combo(self): self._clear_target_class_combo() label = [x for x in self.ui_target_class_combo.parent().children() if isinstance(x, QLabel)][0] if self.instances.domain.has_discrete_class: label_text = 'Target class' values = [c.title() for c in self.instances.domain.class_vars[0].values] values.insert(0, 'None') else: label_text = 'Node color' values = list(ContinuousTreeNode.COLOR_METHODS.keys()) label.setText(label_text) self.ui_target_class_combo.addItems(values) self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def resizeEvent(self, ev): width = (self.view.width() - self.view.verticalScrollBar().width()) self.grid.reflow(width) self.grid.setPreferredWidth(width) super().resizeEvent(ev)