def _group_cols(self, data, group, attr): if isinstance(attr, np.ndarray): attr_col = attr else: attr_col = data.get_column_view(group)[0].astype(float) group_col = data.get_column_view(group)[0].astype(float) groups = [attr_col[group_col == i] for i in range(len(group.values))] groups = [col[~np.isnan(col)] for col in groups] return groups
def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) if not attr.values or not group_var.values: return 2 observed = np.array( contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 2 expected = \ np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \ np.sum(observed) p = chisquare(observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1)[1] if math.isnan(p): return 2 return p
def apply_group_sorting(self): def compute_stat(group): # This function and the one in apply_attr_sorting are similar, but # different in too many details, so they are kept as separate # functions. # If you discover a bug in this function, check the other one, too. if group is attr: return 3 if group is None: return -1 if attr.is_continuous: group_col = data.get_column_view(group)[0].astype(float) groups = (attr_col[group_col == i] for i in range(len(group.values))) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group, attr)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return attr = self.attribute if self.order_grouping_by_importance: if attr.is_continuous: attr_col = data.get_column_view(attr)[0].astype(float) self._sort_list(self.group_vars, self.group_list, compute_stat) else: self._sort_list(self.group_vars, self.group_list, None)
def _setup(self): self.closeContext() data = self.source group = data.domain.metas[0] gvec = data.get_column_view(group)[0] if group.is_string: mask = gvec == self.group_cb.itemData(self.group_index, Qt.DisplayRole) else: mask = gvec == self.group_index data = data[mask] rest = data[:, data.domain.metas[1:]] model = LinkedTableModel(rest, parent=self) ref_col = rest.domain.metas.index(rest.domain[HeaderLabels.REFERENCE]) self.view.setItemDelegateForColumn( ref_col, gui.LinkStyledItemDelegate(self.view)) if self.proxy_model.sourceModel(): self.proxy_model.sourceModel().deleteLater() self.proxy_model.setSourceModel(model) self.openContext(self.selected_group) self.set_selection() self.commit()
def set_source(self, data): # type: (Orange.data.Table) -> None """ Set the source data from which to fetch the output The output is a subset filtered on the first meta column (group) """ self.source = data domain = data.domain if domain.metas: group = domain.metas[0] groupcol, _ = data.get_column_view(group) if group.is_string: group_values = list(map(str, unique(groupcol))) elif group.is_discrete: group_values = group.values else: raise TypeError("Invalid column type") try: idx = group_values.index(self.selected_group) except ValueError: idx = -1 self.group_cb.clear() self.group_cb.addItems(group_values) if idx != -1: self.group_index = idx self.selected_group = group_values[idx] elif group_values: self.group_index = min(max(self.group_index, 0), len(group_values) - 1) self._setup()
def apply_attr_sorting(self): def compute_score(attr): # This function and the one in apply_group_sorting are similar, but # different in too many details, so they are kept as separate # functions. # If you discover a bug in this function, check the other one, too. if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group_var, attr)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self._sort_list(self.attrs, self.attr_list, compute_score) else: self._sort_list(self.attrs, self.attr_list, None)
def group_mask_rows(data, var, values): """ Return a boolean array mask for data rows (instances). The mask will be True wherever the row's entry for `var` contains one of the `values`. Parameters ---------- data : Orange.data.Table Source data table. var : Orange.data.DiscreteVariable The variable/column on which to match `values`. values : sequence of str The values to select (must be a subset of `var.values`) """ var = data.domain[var] col_view, _ = data.get_column_view(var) target_ind = [var.values.index(t) for t in values] mask = numpy.zeros_like(col_view, dtype=bool) for i in target_ind: mask |= col_view == i return mask
def apply_sorting(self): def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group_var, attr)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain attribute = self.attribute group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self.attrs.sort(key=compute_score) else: self.reset_attrs(domain) self.attribute = attribute
def apply_sorting(self): def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) if not attr.values or not group_var.values: return 2 observed = np.array(contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 2 expected = np.outer( observed.sum(axis=1), observed.sum(axis=0) ) / np.sum(observed) p = chisquare( observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1 )[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain attribute = self.attribute group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = ( data.get_column_view(group_var)[0] if domain.has_continuous_attributes( include_class=True, include_metas=True ) else None ) self.attrs.sort(key=compute_score) else: self.reset_attrs(domain) self.attribute = attribute
def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group_var, attr)[1] if math.isnan(p): return 2 return p
def apply_sorting(self): def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) if not attr.values or not group_var.values: return 2 observed = np.array( contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 2 expected = \ np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \ np.sum(observed) p = chisquare(observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain attribute = self.attribute group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self.attrs.sort(key=compute_score) else: self.attrs[:] = chain( domain.variables, (a for a in data.domain.metas if a.is_primitive())) self.attribute = attribute
def __call__(self, data): if isinstance(data, Orange.data.Instance): column = numpy.array([float(data[self.variable])]) else: column = numpy.array(data.get_column_view(self.variable)[0], copy=True) mask = numpy.isnan(column) if not numpy.any(mask): return column if isinstance(data, Orange.data.Instance): predicted = self.model(data) else: predicted = self.model(data[mask]) column[mask] = predicted return column
def __call__(self, data): if isinstance(data, Orange.data.Instance): column = np.array([float(data[self.variable])]) else: column = np.array(data.get_column_view(self.variable)[0], copy=True) mask = np.isnan(column) if not np.any(mask): return column if isinstance(data, Orange.data.Instance): predicted = self.model(data) else: predicted = self.model(data[mask]) column[mask] = predicted return column
def __call__(self, data): if isinstance(data, Orange.data.Instance): data = Orange.data.Table.from_list(data.domain, [data]) domain = data.domain column = np.array(data.get_column_view(self.variable)[0], copy=True) mask = np.isnan(column) if not np.any(mask): return column if domain.class_vars: # cannot have class var in domain (due to backmappers in model) data = data.transform( Orange.data.Domain(domain.attributes, None, domain.metas)) predicted = self.model(data[mask]) column[mask] = predicted return column
def group_selection_mask(data, group, indices): """ Return the selection masks for the group. """ if isinstance(group, ColumnGroup): selected = [group.values[i] for i in indices] target = set([(group.key, value) for value in selected]) I = [bool(set(var.attributes.items()).intersection(target)) for var in data.domain.attributes] return numpy.array(I, dtype=bool) elif isinstance(group, RowGroup): target = set(indices) X, _ = data.get_column_view(group.var) I = numpy.zeros_like(X, dtype=bool) for i in target: I |= X == i return I else: raise TypeError("ColumnGroup or RowGroup expected, got {}".format(type(group).__name__))
def compute_score(attr): # This function and the one in apply_group_sorting are similar, but # different in too many details, so they are kept as separate # functions. # If you discover a bug in this function, check the other one, too. if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group_var, attr)[1] if math.isnan(p): return 2 return p
def group_selection_mask(data, group, indices): """ Return the selection masks for the group. """ if isinstance(group, ColumnGroup): selected = [group.values[i] for i in indices] target = set([(group.key, value) for value in selected]) I = [bool(set(var.attributes.items()).intersection(target)) for var in data.domain.attributes] return numpy.array(I, dtype=bool) elif isinstance(group, RowGroup): target = set(indices) X, _ = data.get_column_view(group.var) I = numpy.zeros_like(X, dtype=bool) for i in target: I |= X == i return I else: raise TypeError("ColumnGroup or RowGroup expected, got {}" .format(type(group).__name__))
def _setup_plot(self): """Setup the plot with new curve data.""" assert self.data is not None self.graph.clear() data, domain = self.data, self.data.domain var = domain[self.group_var] class_col_data, _ = data.get_column_view(var) group_indices = [ np.flatnonzero(class_col_data == i) for i in range(len(self.classes)) ] self.graph.getAxis('bottom').setTicks([[ (i + 1, str(a)) for i, a in enumerate(self.graph_variables) ]]) X = np.arange(1, len(self.graph_variables) + 1) groups = [] for i, indices in enumerate(group_indices): if len(indices) == 0: groups.append(None) else: if self.classes: color = self.class_colors[i] else: color = QColor(Qt.darkGray) group_data = data[indices, self.graph_variables] plot_x, plot_y, connect = disconnected_curve_data(group_data.X, x=X) color.setAlpha(200) lightcolor = QColor(color.lighter(factor=150)) lightcolor.setAlpha(150) pen = QPen(color, 2) pen.setCosmetic(True) lightpen = QPen(lightcolor, 1) lightpen.setCosmetic(True) curve = pg.PlotCurveItem( x=plot_x, y=plot_y, connect=connect, pen=lightpen, symbolSize=2, antialias=True, ) self.graph.addItem(curve) mean = np.nanmean(group_data.X, axis=0) meancurve = pg.PlotDataItem(x=X, y=mean, pen=pen, size=5, symbol="o", pxMode=True, symbolSize=5, antialias=True) self.graph.addItem(meancurve) q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0) # TODO: implement and use a box plot item errorbar = pg.ErrorBarItem(x=X, y=mean, bottom=np.clip( mean - q1, 0, mean - q1), top=np.clip(q3 - mean, 0, q3 - mean), beam=0.5) self.graph.addItem(errorbar) groups.append( namespace(data=group_data, indices=indices, profiles=curve, mean=meancurve, boxplot=errorbar)) self.__groups = groups self.__update_visibility()
def commit(self): items = getattr(self.matrix, "items", self.items) if not items: # nothing to commit return selection = self.dendrogram.selected_nodes() selection = sorted(selection, key=lambda c: c.value.first) indices = [leaf.value.index for leaf in leaves(self.root)] maps = [indices[node.value.first:node.value.last] for node in selection] selected_indices = list(chain(*maps)) unselected_indices = sorted(set(range(self.root.value.last)) - set(selected_indices)) selected = [items[k] for k in selected_indices] unselected = [items[k] for k in unselected_indices] if not selected: self.send("Selected Data", None) self.send("Other Data", None) return selected_data = unselected_data = None if isinstance(items, Orange.data.Table): c = numpy.zeros(len(items)) for i, indices in enumerate(maps): c[indices] = i c[unselected_indices] = len(maps) mask = c != len(maps) if self.append_clusters: clust_var = Orange.data.DiscreteVariable( str(self.cluster_name), values=["Cluster {}".format(i + 1) for i in range(len(maps))] + ["Other"], ordered=True ) data, domain = items, items.domain attrs = domain.attributes class_ = domain.class_vars metas = domain.metas if self.cluster_role == self.AttributeRole: attrs = attrs + (clust_var,) elif self.cluster_role == self.ClassRole: class_ = class_ + (clust_var,) elif self.cluster_role == self.MetaRole: metas = metas + (clust_var,) domain = Orange.data.Domain(attrs, class_, metas) data = Orange.data.Table(domain, data) data.get_column_view(clust_var)[0][:] = c else: data = items if selected: selected_data = data[mask] if unselected: unselected_data = data[~mask] self.send("Selected Data", selected_data) self.send("Other Data", unselected_data)
def column(data, variable): a, _ = data.get_column_view(variable) return a.ravel()
def __call__(self, data, variable): col, _ = data.get_column_view(variable) return np.isnan(col)
def _setup_plot(self): """Setup the plot with new curve data.""" assert self.data is not None self.graph.clear() data, domain = self.data, self.data.domain var = domain[self.group_var] class_col_data, _ = data.get_column_view(var) group_indices = [np.flatnonzero(class_col_data == i) for i in range(len(self.classes))] self.graph.getAxis('bottom').setTicks([ [(i+1, str(a)) for i, a in enumerate(self.graph_variables)] ]) X = np.arange(1, len(self.graph_variables)+1) groups = [] for i, indices in enumerate(group_indices): if len(indices) == 0: groups.append(None) else: if self.classes: color = self.class_colors[i] else: color = QColor(Qt.darkGray) group_data = data[indices, self.graph_variables] plot_x, plot_y, connect = disconnected_curve_data(group_data.X, x=X) color.setAlpha(200) lightcolor = QColor(color.lighter(factor=150)) lightcolor.setAlpha(150) pen = QPen(color, 2) pen.setCosmetic(True) lightpen = QPen(lightcolor, 1) lightpen.setCosmetic(True) curve = pg.PlotCurveItem( x=plot_x, y=plot_y, connect=connect, pen=lightpen, symbolSize=2, antialias=True, ) self.graph.addItem(curve) mean = np.nanmean(group_data.X, axis=0) meancurve = pg.PlotDataItem( x=X, y=mean, pen=pen, size=5, symbol="o", pxMode=True, symbolSize=5, antialias=True ) self.graph.addItem(meancurve) q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0) # TODO: implement and use a box plot item errorbar = pg.ErrorBarItem( x=X, y=mean, bottom=np.clip(mean - q1, 0, mean - q1), top=np.clip(q3 - mean, 0, q3 - mean), beam=0.5 ) self.graph.addItem(errorbar) groups.append( namespace( data=group_data, indices=indices, profiles=curve, mean=meancurve, boxplot=errorbar) ) self.__groups = groups self.__update_visibility()
def _setup_plot(self): """Setup the plot with new curve data.""" assert self.data is not None legend = self.graph.plotItem.addLegend(offset=(-30, 30)) data, domain = self.data, self.data.domain if is_discrete(domain.class_var): class_col_data, _ = data.get_column_view(domain.class_var) group_indices = [ np.flatnonzero(class_col_data == i) for i in range(len(domain.class_var.values)) ] else: group_indices = [np.arange(len(data))] X = np.arange(1, len(domain.attributes) + 1) groups = [] for i, indices in enumerate(group_indices): if self.classes: color = self.class_colors[i] else: color = QColor(Qt.darkGray) group_data = data[indices, :] plot_x, plot_y, connect = disconnected_curve_data(group_data.X, x=X) color.setAlpha(200) lightcolor = QColor(color.lighter(factor=150)) lightcolor.setAlpha(150) pen = QPen(color, 2) pen.setCosmetic(True) lightpen = QPen(lightcolor, 1) lightpen.setCosmetic(True) hoverpen = QPen(pen) hoverpen.setWidth(2) curve = pg.PlotCurveItem( x=plot_x, y=plot_y, connect=connect, pen=lightpen, symbolSize=2, antialias=True, ) self.graph.addItem(curve) hovercurves = [] for index, profile in zip(indices, group_data.X): hcurve = HoverCurve(x=X, y=profile, pen=hoverpen, antialias=True) hcurve.setToolTip('{}'.format(index)) hcurve._data_index = index hovercurves.append(hcurve) self.graph.addItem(hcurve) mean = np.nanmean(group_data.X, axis=0) meancurve = pg.PlotDataItem(x=X, y=mean, pen=pen, size=5, symbol="o", pxMode=True, symbolSize=5, antialias=True) hoverpen = QPen(hoverpen) hoverpen.setWidth(5) hc = HoverCurve(x=X, y=mean, pen=hoverpen, antialias=True) hc.setFlag(QGraphicsItem.ItemIsSelectable, False) self.graph.addItem(hc) self.graph.addItem(meancurve) legend.addItem(meancurve, " {}".format(self.classes[i])) q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0) # TODO: implement and use a box plot item errorbar = pg.ErrorBarItem(x=X, y=mean, bottom=np.clip(mean - q1, 0, mean - q1), top=np.clip(q3 - mean, 0, q3 - mean), beam=0.5) self.graph.addItem(errorbar) groups.append( namespace(data=group_data, indices=indices, profiles=curve, hovercurves=hovercurves, mean=meancurve, boxplot=errorbar)) self.__groups = groups self.__update_visibility() self.__update_tooltips()
def _setup_plot(self): """Setup the plot with new curve data.""" assert self.data is not None data, domain = self.data, self.data.domain if is_discrete(domain.class_var): class_col_data, _ = data.get_column_view(domain.class_var) group_indices = [np.flatnonzero(class_col_data == i) for i in range(len(domain.class_var.values))] else: group_indices = [np.arange(len(data))] X = np.arange(1, len(domain.attributes)+1) groups = [] for i, indices in enumerate(group_indices): if self.classes: color = self.class_colors[i] else: color = QColor(Qt.darkGray) group_data = data[indices, :] plot_x, plot_y, connect = disconnected_curve_data(group_data.X, x=X) color.setAlpha(200) lightcolor = QColor(color.lighter(factor=150)) lightcolor.setAlpha(150) pen = QPen(color, 2) pen.setCosmetic(True) lightpen = QPen(lightcolor, 1) lightpen.setCosmetic(True) hoverpen = QPen(pen) hoverpen.setWidth(2) curve = pg.PlotCurveItem( x=plot_x, y=plot_y, connect=connect, pen=lightpen, symbolSize=2, antialias=True, ) self.graph.addItem(curve) hovercurves = [] for index, profile in zip(indices, group_data.X): hcurve = HoverCurve(x=X, y=profile, pen=hoverpen, antialias=True) hcurve.setToolTip('{}'.format(index)) hcurve._data_index = index hovercurves.append(hcurve) self.graph.addItem(hcurve) mean = np.nanmean(group_data.X, axis=0) meancurve = pg.PlotDataItem( x=X, y=mean, pen=pen, size=5, symbol="o", pxMode=True, symbolSize=5, antialias=True ) hoverpen = QPen(hoverpen) hoverpen.setWidth(5) hc = HoverCurve(x=X, y=mean, pen=hoverpen, antialias=True) hc.setFlag(QGraphicsItem.ItemIsSelectable, False) self.graph.addItem(hc) self.graph.addItem(meancurve) self.legend_items.append(meancurve) q1, q2, q3 = np.nanpercentile(group_data.X, [25, 50, 75], axis=0) # TODO: implement and use a box plot item errorbar = pg.ErrorBarItem( x=X, y=mean, bottom=np.clip(mean - q1, 0, mean - q1), top=np.clip(q3 - mean, 0, q3 - mean), beam=0.5 ) self.graph.addItem(errorbar) groups.append( namespace( data=group_data, indices=indices, profiles=curve, hovercurves=hovercurves, mean=meancurve, boxplot=errorbar) ) self.__groups = groups self.__update_visibility() self.__update_tooltips()
def commit(self): items = getattr(self.matrix, "items", self.items) if not items: # nothing to commit return selection = self.dendrogram.selected_nodes() selection = sorted(selection, key=lambda c: c.value.first) indices = [leaf.value.index for leaf in leaves(self.root)] maps = [ indices[node.value.first:node.value.last] for node in selection ] selected_indices = list(chain(*maps)) unselected_indices = sorted( set(range(self.root.value.last)) - set(selected_indices)) if not selected_indices: self.send("Selected Data", None) self.send("Other Data", None) return selected_data = unselected_data = None if isinstance(items, Orange.data.Table) and self.matrix.axis == 1: # Select rows c = numpy.zeros(self.matrix.X.shape[0]) for i, indices in enumerate(maps): c[indices] = i c[unselected_indices] = len(maps) mask = c != len(maps) if self.append_clusters: clust_var = Orange.data.DiscreteVariable( str(self.cluster_name), values=[ "Cluster {}".format(i + 1) for i in range(len(maps)) ] + ["Other"]) data, domain = items, items.domain attrs = domain.attributes class_ = domain.class_vars metas = domain.metas if self.cluster_role == self.AttributeRole: attrs = attrs + (clust_var, ) elif self.cluster_role == self.ClassRole: class_ = class_ + (clust_var, ) elif self.cluster_role == self.MetaRole: metas = metas + (clust_var, ) domain = Orange.data.Domain(attrs, class_, metas) data = Orange.data.Table.from_table(domain, items) data.get_column_view(clust_var)[0][:] = c else: data = items if selected_indices: selected_data = data[mask] if unselected_indices: unselected_data = data[~mask] elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0: # Select columns domain = Orange.data.Domain( [items.domain[i] for i in selected_indices], items.domain.class_vars, items.domain.metas) selected_data = items.from_table(domain, items) domain = Orange.data.Domain( [items.domain[i] for i in unselected_indices], items.domain.class_vars, items.domain.metas) unselected_data = items.from_table(domain, items) self.send("Selected Data", selected_data) self.send("Other Data", unselected_data)
def commit(self): items = getattr(self.matrix, "items", self.items) if not items: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) return selection = self.dendrogram.selected_nodes() selection = sorted(selection, key=lambda c: c.value.first) indices = [leaf.value.index for leaf in leaves(self.root)] maps = [ indices[node.value.first:node.value.last] for node in selection ] selected_indices = list(chain(*maps)) unselected_indices = sorted( set(range(self.root.value.last)) - set(selected_indices)) if not selected_indices: self.Outputs.selected_data.send(None) annotated_data = create_annotated_table(items, []) \ if self.selection_method == 0 and self.matrix.axis else None self.Outputs.annotated_data.send(annotated_data) return selected_data = None if isinstance(items, Orange.data.Table) and self.matrix.axis == 1: # Select rows c = np.zeros(self.matrix.shape[0]) for i, indices in enumerate(maps): c[indices] = i c[unselected_indices] = len(maps) mask = c != len(maps) data, domain = items, items.domain attrs = domain.attributes classes = domain.class_vars metas = domain.metas var_name = get_unique_names(domain, "Cluster") values = [f"C{i + 1}" for i in range(len(maps))] clust_var = Orange.data.DiscreteVariable(var_name, values=values + ["Other"]) domain = Orange.data.Domain(attrs, classes, metas + (clust_var, )) data = items.transform(domain) with data.unlocked(data.metas): data.get_column_view(clust_var)[0][:] = c if selected_indices: selected_data = data[mask] clust_var = Orange.data.DiscreteVariable(var_name, values=values) selected_data.domain = Domain(attrs, classes, metas + (clust_var, )) annotated_data = create_annotated_table(data, selected_indices) elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0: # Select columns attrs = [] for clust, indices in chain(enumerate(maps, start=1), [(0, unselected_indices)]): for i in indices: attr = items.domain[i].copy() attr.attributes["cluster"] = clust attrs.append(attr) domain = Orange.data.Domain( # len(unselected_indices) can be 0 attrs[:len(attrs) - len(unselected_indices)], items.domain.class_vars, items.domain.metas) selected_data = items.from_table(domain, items) domain = Orange.data.Domain(attrs, items.domain.class_vars, items.domain.metas) annotated_data = items.from_table(domain, items) self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(annotated_data)