class OWFeatureConstructor(OWWidget): name = "Feature Constructor" description = "Construct new features (data columns) from a set of " \ "existing features in the input data set." icon = "icons/FeatureConstructor.svg" inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data", Orange.data.Table)] want_main_area = False settingsHandler = FeatureConstructorSettingsHandler() descriptors = ContextSetting([]) currentIndex = ContextSetting(-1) EDITORS = [ (ContinuousDescriptor, ContinuousFeatureEditor), (DiscreteDescriptor, DiscreteFeatureEditor), (StringDescriptor, StringFeatureEditor) ] class Error(OWWidget.Error): more_values_needed = Msg("Discrete feature {} needs more values.") invalid_expressions = Msg("Invalid expressions: {}.") def __init__(self): super().__init__() self.data = None self.editors = {} box = gui.vBox(self.controlArea, "Variable Definitions") toplayout = QHBoxLayout() toplayout.setContentsMargins(0, 0, 0, 0) box.layout().addLayout(toplayout) self.editorstack = QStackedWidget( sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) ) for descclass, editorclass in self.EDITORS: editor = editorclass() editor.featureChanged.connect(self._on_modified) self.editors[descclass] = editor self.editorstack.addWidget(editor) self.editorstack.setEnabled(False) buttonlayout = QVBoxLayout(spacing=10) buttonlayout.setContentsMargins(0, 0, 0, 0) self.addbutton = QPushButton( "New", toolTip="Create a new variable", minimumWidth=120, shortcut=QKeySequence.New ) def unique_name(fmt, reserved): candidates = (fmt.format(i) for i in count(1)) return next(c for c in candidates if c not in reserved) def reserved_names(): varnames = [] if self.data is not None: varnames = [var.name for var in self.data.domain.variables + self.data.domain.metas] varnames += [desc.name for desc in self.featuremodel] return set(varnames) def generate_newname(fmt): return unique_name(fmt, reserved_names()) menu = QMenu(self.addbutton) cont = menu.addAction("Continuous") cont.triggered.connect( lambda: self.addFeature( ContinuousDescriptor(generate_newname("X{}"), "", 3)) ) disc = menu.addAction("Discrete") disc.triggered.connect( lambda: self.addFeature( DiscreteDescriptor(generate_newname("D{}"), "", ("A", "B"), -1, False)) ) string = menu.addAction("String") string.triggered.connect( lambda: self.addFeature( StringDescriptor(generate_newname("S{}"), "")) ) menu.addSeparator() self.duplicateaction = menu.addAction("Duplicate Selected Variable") self.duplicateaction.triggered.connect(self.duplicateFeature) self.duplicateaction.setEnabled(False) self.addbutton.setMenu(menu) self.removebutton = QPushButton( "Remove", toolTip="Remove selected variable", minimumWidth=120, shortcut=QKeySequence.Delete ) self.removebutton.clicked.connect(self.removeSelectedFeature) buttonlayout.addWidget(self.addbutton) buttonlayout.addWidget(self.removebutton) buttonlayout.addStretch(10) toplayout.addLayout(buttonlayout, 0) toplayout.addWidget(self.editorstack, 10) # Layout for the list view layout = QVBoxLayout(spacing=1, margin=0) self.featuremodel = DescriptorModel(parent=self) self.featureview = QListView( minimumWidth=200, sizePolicy=QSizePolicy(QSizePolicy.Minimum, QSizePolicy.MinimumExpanding) ) self.featureview.setItemDelegate(FeatureItemDelegate(self)) self.featureview.setModel(self.featuremodel) self.featureview.selectionModel().selectionChanged.connect( self._on_selectedVariableChanged ) layout.addWidget(self.featureview) box.layout().addLayout(layout, 1) box = gui.hBox(self.controlArea) box.layout().addWidget(self.report_button) self.report_button.setMinimumWidth(180) gui.rubber(box) commit = gui.button(box, self, "Send", callback=self.apply, default=True) commit.setMinimumWidth(180) def setCurrentIndex(self, index): index = min(index, len(self.featuremodel) - 1) self.currentIndex = index if index >= 0: itemmodels.select_row(self.featureview, index) desc = self.featuremodel[min(index, len(self.featuremodel) - 1)] editor = self.editors[type(desc)] self.editorstack.setCurrentWidget(editor) editor.setEditorData(desc, self.data.domain if self.data else None) self.editorstack.setEnabled(index >= 0) self.duplicateaction.setEnabled(index >= 0) self.removebutton.setEnabled(index >= 0) def _on_selectedVariableChanged(self, selected, *_): index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) else: self.setCurrentIndex(-1) def _on_modified(self): if self.currentIndex >= 0: editor = self.editorstack.currentWidget() self.featuremodel[self.currentIndex] = editor.editorData() self.descriptors = list(self.featuremodel) def setDescriptors(self, descriptors): """ Set a list of variable descriptors to edit. """ self.descriptors = descriptors self.featuremodel[:] = list(self.descriptors) @check_sql_input def setData(self, data=None): """Set the input dataset.""" self.closeContext() self.data = data if self.data is not None: descriptors = list(self.descriptors) currindex = self.currentIndex self.descriptors = [] self.currentIndex = -1 self.openContext(data) if descriptors != self.descriptors or \ self.currentIndex != currindex: # disconnect from the selection model while reseting the model selmodel = self.featureview.selectionModel() selmodel.selectionChanged.disconnect( self._on_selectedVariableChanged) self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(self.currentIndex) selmodel.selectionChanged.connect( self._on_selectedVariableChanged) self.editorstack.setEnabled(self.currentIndex >= 0) def handleNewSignals(self): if self.data is not None: self.apply() else: self.send("Data", None) def addFeature(self, descriptor): self.featuremodel.append(descriptor) self.setCurrentIndex(len(self.featuremodel) - 1) editor = self.editorstack.currentWidget() editor.nameedit.setFocus() editor.nameedit.selectAll() def removeFeature(self, index): del self.featuremodel[index] index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) elif index is None and len(self.featuremodel) > 0: # Deleting the last item clears selection self.setCurrentIndex(len(self.featuremodel) - 1) def removeSelectedFeature(self): if self.currentIndex >= 0: self.removeFeature(self.currentIndex) def duplicateFeature(self): desc = self.featuremodel[self.currentIndex] self.addFeature(copy.deepcopy(desc)) def check_attrs_values(self, attr, data): for i in range(len(data)): for var in attr: if not math.isnan(data[i, var]) \ and int(data[i, var]) >= len(var.values): return var.name return None def _validate_descriptors(self, desc): def validate(source): try: return validate_exp(ast.parse(source, mode="eval")) except Exception: return False final = [] invalid = [] for d in desc: if validate(d.expression): final.append(d) else: final.append(d._replace(expression="")) invalid.append(d) if invalid: self.Error.invalid_expressions(", ".join(s.name for s in invalid)) return final def apply(self): self.Error.clear() if self.data is None: return desc = list(self.featuremodel) desc = self._validate_descriptors(desc) source_vars = tuple(self.data.domain) + self.data.domain.metas new_variables = construct_variables(desc, source_vars) attrs = [var for var in new_variables if var.is_primitive()] metas = [var for var in new_variables if not var.is_primitive()] new_domain = Orange.data.Domain( self.data.domain.attributes + tuple(attrs), self.data.domain.class_vars, metas=self.data.domain.metas + tuple(metas) ) try: data = Orange.data.Table(new_domain, self.data) except Exception as err: log = logging.getLogger(__name__) log.error("", exc_info=True) self.error("".join(format_exception_only(type(err), err)).rstrip()) return disc_attrs_not_ok = self.check_attrs_values( [var for var in attrs if var.is_discrete], data) if disc_attrs_not_ok: self.Error.more_values_needed(disc_attrs_not_ok) return self.send("Data", data) def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): items[feature.name] = "{} (discrete with values {}{})".format( feature.expression, feature.values, "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items( report.plural("Constructed feature{s}", len(items)), items)
class OWFeatureConstructor(OWWidget): name = "特征构造器(Feature Constructor)" description = "用输入数据集中的现有特征构造新特征。" icon = "icons/FeatureConstructor.svg" keywords = ['tezheng', 'gouzao', 'tezhenggouzao'] category = "数据(Data)" icon = "icons/FeatureConstructor.svg" class Inputs: data = Input("数据(Data)", Orange.data.Table, replaces=['Data']) class Outputs: data = Output("数据(Data)", Orange.data.Table, replaces=['Data']) want_main_area = False settingsHandler = FeatureConstructorHandler() descriptors = ContextSetting([]) currentIndex = ContextSetting(-1) expressions_with_values = ContextSetting(False) settings_version = 2 EDITORS = [(ContinuousDescriptor, ContinuousFeatureEditor), (DateTimeDescriptor, DateTimeFeatureEditor), (DiscreteDescriptor, DiscreteFeatureEditor), (StringDescriptor, StringFeatureEditor)] class Error(OWWidget.Error): more_values_needed = Msg("Categorical feature {} needs more values.") invalid_expressions = Msg("Invalid expressions: {}.") class Warning(OWWidget.Warning): renamed_var = Msg("Recently added variable has been renamed, " "to avoid duplicates.\n") def __init__(self): super().__init__() self.data = None self.editors = {} box = gui.vBox(self.controlArea, "变量定义") toplayout = QHBoxLayout() toplayout.setContentsMargins(0, 0, 0, 0) box.layout().addLayout(toplayout) self.editorstack = QStackedWidget(sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)) for descclass, editorclass in self.EDITORS: editor = editorclass() editor.featureChanged.connect(self._on_modified) self.editors[descclass] = editor self.editorstack.addWidget(editor) self.editorstack.setEnabled(False) buttonlayout = QVBoxLayout(spacing=10) buttonlayout.setContentsMargins(0, 0, 0, 0) self.addbutton = QPushButton("新建", toolTip="Create a new variable", minimumWidth=120, shortcut=QKeySequence.New) def unique_name(fmt, reserved): candidates = (fmt.format(i) for i in count(1)) return next(c for c in candidates if c not in reserved) def generate_newname(fmt): return unique_name(fmt, self.reserved_names()) menu = QMenu(self.addbutton) cont = menu.addAction("数值数据") cont.triggered.connect(lambda: self.addFeature( ContinuousDescriptor(generate_newname("X{}"), "", 3))) disc = menu.addAction("分类数据") disc.triggered.connect(lambda: self.addFeature( DiscreteDescriptor(generate_newname("D{}"), "", (), False))) string = menu.addAction("文本") string.triggered.connect(lambda: self.addFeature( StringDescriptor(generate_newname("S{}"), ""))) datetime = menu.addAction("日期/时间") datetime.triggered.connect(lambda: self.addFeature( DateTimeDescriptor(generate_newname("T{}"), ""))) menu.addSeparator() self.duplicateaction = menu.addAction("复制选中变量") self.duplicateaction.triggered.connect(self.duplicateFeature) self.duplicateaction.setEnabled(False) self.addbutton.setMenu(menu) self.removebutton = QPushButton("删除", toolTip="删除选中变量", minimumWidth=120, shortcut=QKeySequence.Delete) self.removebutton.clicked.connect(self.removeSelectedFeature) buttonlayout.addWidget(self.addbutton) buttonlayout.addWidget(self.removebutton) buttonlayout.addStretch(10) toplayout.addLayout(buttonlayout, 0) toplayout.addWidget(self.editorstack, 10) # Layout for the list view layout = QVBoxLayout(spacing=1, margin=0) self.featuremodel = DescriptorModel(parent=self) self.featureview = QListView(minimumWidth=200, minimumHeight=50, sizePolicy=QSizePolicy( QSizePolicy.Minimum, QSizePolicy.MinimumExpanding)) self.featureview.setItemDelegate(FeatureItemDelegate(self)) self.featureview.setModel(self.featuremodel) self.featureview.selectionModel().selectionChanged.connect( self._on_selectedVariableChanged) layout.addWidget(self.featureview) box.layout().addLayout(layout, 1) self.fix_button = gui.button(self.buttonsArea, self, "Upgrade Expressions", callback=self.fix_expressions) self.fix_button.setHidden(True) gui.button(self.buttonsArea, self, "Send", callback=self.apply, default=True) def setCurrentIndex(self, index): index = min(index, len(self.featuremodel) - 1) self.currentIndex = index if index >= 0: itemmodels.select_row(self.featureview, index) desc = self.featuremodel[min(index, len(self.featuremodel) - 1)] editor = self.editors[type(desc)] self.editorstack.setCurrentWidget(editor) editor.setEditorData(desc, self.data.domain if self.data else None) self.editorstack.setEnabled(index >= 0) self.duplicateaction.setEnabled(index >= 0) self.removebutton.setEnabled(index >= 0) def _on_selectedVariableChanged(self, selected, *_): index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) else: self.setCurrentIndex(-1) def _on_modified(self): if self.currentIndex >= 0: self.Warning.clear() editor = self.editorstack.currentWidget() proposed = editor.editorData().name uniq = get_unique_names(self.reserved_names(self.currentIndex), proposed) feature = editor.editorData() if editor.editorData().name != uniq: self.Warning.renamed_var() feature = feature.__class__(uniq, *feature[1:]) self.featuremodel[self.currentIndex] = feature self.descriptors = list(self.featuremodel) def setDescriptors(self, descriptors): """ Set a list of variable descriptors to edit. """ self.descriptors = descriptors self.featuremodel[:] = list(self.descriptors) def reserved_names(self, idx_=None): varnames = [] if self.data is not None: varnames = [ var.name for var in self.data.domain.variables + self.data.domain.metas ] varnames += [ desc.name for idx, desc in enumerate(self.featuremodel) if idx != idx_ ] return set(varnames) @Inputs.data @check_sql_input def setData(self, data=None): """Set the input dataset.""" self.closeContext() self.data = data self.expressions_with_values = False self.descriptors = [] self.currentIndex = -1 if self.data is not None: self.openContext(data) # disconnect from the selection model while reseting the model selmodel = self.featureview.selectionModel() selmodel.selectionChanged.disconnect(self._on_selectedVariableChanged) self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(self.currentIndex) selmodel.selectionChanged.connect(self._on_selectedVariableChanged) self.fix_button.setHidden(not self.expressions_with_values) self.editorstack.setEnabled(self.currentIndex >= 0) def handleNewSignals(self): if self.data is not None: self.apply() else: self.Outputs.data.send(None) self.fix_button.setHidden(True) def addFeature(self, descriptor): self.featuremodel.append(descriptor) self.setCurrentIndex(len(self.featuremodel) - 1) editor = self.editorstack.currentWidget() editor.nameedit.setFocus() editor.nameedit.selectAll() def removeFeature(self, index): del self.featuremodel[index] index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) elif index is None and self.featuremodel.rowCount(): # Deleting the last item clears selection self.setCurrentIndex(self.featuremodel.rowCount() - 1) def removeSelectedFeature(self): if self.currentIndex >= 0: self.removeFeature(self.currentIndex) def duplicateFeature(self): desc = self.featuremodel[self.currentIndex] self.addFeature(copy.deepcopy(desc)) @staticmethod def check_attrs_values(attr, data): for i in range(len(data)): for var in attr: if not math.isnan(data[i, var]) \ and int(data[i, var]) >= len(var.values): return var.name return None def _validate_descriptors(self, desc): def validate(source): try: return validate_exp(ast.parse(source, mode="eval")) # ast.parse can return arbitrary errors, not only SyntaxError # pylint: disable=broad-except except Exception: return False final = [] invalid = [] for d in desc: if validate(d.expression): final.append(d) else: final.append(d._replace(expression="")) invalid.append(d) if invalid: self.Error.invalid_expressions(", ".join(s.name for s in invalid)) return final def apply(self): def report_error(err): log = logging.getLogger(__name__) log.error("", exc_info=True) self.error("".join(format_exception_only(type(err), err)).rstrip()) self.Error.clear() if self.data is None: return desc = list(self.featuremodel) desc = self._validate_descriptors(desc) try: new_variables = construct_variables(desc, self.data, self.expressions_with_values) # user's expression can contain arbitrary errors except Exception as err: # pylint: disable=broad-except report_error(err) return attrs = [var for var in new_variables if var.is_primitive()] metas = [var for var in new_variables if not var.is_primitive()] new_domain = Orange.data.Domain( self.data.domain.attributes + tuple(attrs), self.data.domain.class_vars, metas=self.data.domain.metas + tuple(metas)) try: for variable in new_variables: variable.compute_value.mask_exceptions = False data = self.data.transform(new_domain) # user's expression can contain arbitrary errors # pylint: disable=broad-except except Exception as err: report_error(err) return finally: for variable in new_variables: variable.compute_value.mask_exceptions = True disc_attrs_not_ok = self.check_attrs_values( [var for var in attrs if var.is_discrete], data) if disc_attrs_not_ok: self.Error.more_values_needed(disc_attrs_not_ok) return self.Outputs.data.send(data) def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): desc = "categorical" if feature.values: desc += " with values " \ + ", ".join(f"'{val}'" for val in feature.values) if feature.ordered: desc += "; ordered" elif isinstance(feature, ContinuousDescriptor): desc = "numeric" elif isinstance(feature, DateTimeDescriptor): desc = "date/time" else: desc = "text" items[feature.name] = f"{feature.expression} ({desc})" self.report_items(report.plural("Constructed feature{s}", len(items)), items) def fix_expressions(self): dlg = QMessageBox( QMessageBox.Question, "Fix Expressions", "This widget's behaviour has changed. Values of categorical " "variables are now inserted as their textual representations " "(strings); previously they appeared as integer numbers, with an " "attribute '.value' that contained the text.\n\n" "The widget currently runs in compatibility mode. After " "expressions are updated, manually check for their correctness.") dlg.addButton("Update", QMessageBox.ApplyRole) dlg.addButton("Cancel", QMessageBox.RejectRole) if dlg.exec() == QMessageBox.RejectRole: return def fixer(mo): var = domain[mo.group(2)] if mo.group(3) == ".value": # uses string; remove `.value` return "".join(mo.group(1, 2, 4)) # Uses ints: get them by indexing return mo.group(1) + "{" + \ ", ".join(f"'{val}': {i}" for i, val in enumerate(var.values)) + \ f"}}[{var.name}]" + mo.group(4) domain = self.data.domain disc_vars = "|".join(f"{var.name}" for var in chain(domain.variables, domain.metas) if var.is_discrete) expr = re.compile(r"(^|\W)(" + disc_vars + r")(\.value)?(\W|$)") self.descriptors[:] = [ descriptor._replace( expression=expr.sub(fixer, descriptor.expression)) for descriptor in self.descriptors ] self.expressions_with_values = False self.fix_button.hide() index = self.currentIndex self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(index) self.apply() @classmethod def migrate_context(cls, context, version): if version is None or version < 2: used_vars = set( chain(*( freevars(ast.parse(descriptor.expression, mode="eval"), []) for descriptor in context.values["descriptors"] if descriptor.expression))) disc_vars = { name for (name, vtype) in chain(context.attributes.items(), context.metas.items()) if vtype == 1 } if used_vars & disc_vars: context.values["expressions_with_values"] = True
class OWFeatureConstructor(OWWidget): name = "Feature Constructor" description = "Construct new features (data columns) from a set of " \ "existing features in the input data set." icon = "icons/FeatureConstructor.svg" class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) want_main_area = False settingsHandler = FeatureConstructorHandler() descriptors = ContextSetting([]) currentIndex = ContextSetting(-1) EDITORS = [ (ContinuousDescriptor, ContinuousFeatureEditor), (DiscreteDescriptor, DiscreteFeatureEditor), (StringDescriptor, StringFeatureEditor) ] class Error(OWWidget.Error): more_values_needed = Msg("Categorical feature {} needs more values.") invalid_expressions = Msg("Invalid expressions: {}.") def __init__(self): super().__init__() self.data = None self.editors = {} box = gui.vBox(self.controlArea, "Variable Definitions") toplayout = QHBoxLayout() toplayout.setContentsMargins(0, 0, 0, 0) box.layout().addLayout(toplayout) self.editorstack = QStackedWidget( sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) ) for descclass, editorclass in self.EDITORS: editor = editorclass() editor.featureChanged.connect(self._on_modified) self.editors[descclass] = editor self.editorstack.addWidget(editor) self.editorstack.setEnabled(False) buttonlayout = QVBoxLayout(spacing=10) buttonlayout.setContentsMargins(0, 0, 0, 0) self.addbutton = QPushButton( "New", toolTip="Create a new variable", minimumWidth=120, shortcut=QKeySequence.New ) def unique_name(fmt, reserved): candidates = (fmt.format(i) for i in count(1)) return next(c for c in candidates if c not in reserved) def reserved_names(): varnames = [] if self.data is not None: varnames = [var.name for var in self.data.domain.variables + self.data.domain.metas] varnames += [desc.name for desc in self.featuremodel] return set(varnames) def generate_newname(fmt): return unique_name(fmt, reserved_names()) menu = QMenu(self.addbutton) cont = menu.addAction("Numeric") cont.triggered.connect( lambda: self.addFeature( ContinuousDescriptor(generate_newname("X{}"), "", 3)) ) disc = menu.addAction("Categorical") disc.triggered.connect( lambda: self.addFeature( DiscreteDescriptor(generate_newname("D{}"), "", ("A", "B"), -1, False)) ) string = menu.addAction("Text") string.triggered.connect( lambda: self.addFeature( StringDescriptor(generate_newname("S{}"), "")) ) menu.addSeparator() self.duplicateaction = menu.addAction("Duplicate Selected Variable") self.duplicateaction.triggered.connect(self.duplicateFeature) self.duplicateaction.setEnabled(False) self.addbutton.setMenu(menu) self.removebutton = QPushButton( "Remove", toolTip="Remove selected variable", minimumWidth=120, shortcut=QKeySequence.Delete ) self.removebutton.clicked.connect(self.removeSelectedFeature) buttonlayout.addWidget(self.addbutton) buttonlayout.addWidget(self.removebutton) buttonlayout.addStretch(10) toplayout.addLayout(buttonlayout, 0) toplayout.addWidget(self.editorstack, 10) # Layout for the list view layout = QVBoxLayout(spacing=1, margin=0) self.featuremodel = DescriptorModel(parent=self) self.featureview = QListView( minimumWidth=200, sizePolicy=QSizePolicy(QSizePolicy.Minimum, QSizePolicy.MinimumExpanding) ) self.featureview.setItemDelegate(FeatureItemDelegate(self)) self.featureview.setModel(self.featuremodel) self.featureview.selectionModel().selectionChanged.connect( self._on_selectedVariableChanged ) layout.addWidget(self.featureview) box.layout().addLayout(layout, 1) box = gui.hBox(self.controlArea) gui.rubber(box) commit = gui.button(box, self, "Send", callback=self.apply, default=True) commit.setMinimumWidth(180) def setCurrentIndex(self, index): index = min(index, len(self.featuremodel) - 1) self.currentIndex = index if index >= 0: itemmodels.select_row(self.featureview, index) desc = self.featuremodel[min(index, len(self.featuremodel) - 1)] editor = self.editors[type(desc)] self.editorstack.setCurrentWidget(editor) editor.setEditorData(desc, self.data.domain if self.data else None) self.editorstack.setEnabled(index >= 0) self.duplicateaction.setEnabled(index >= 0) self.removebutton.setEnabled(index >= 0) def _on_selectedVariableChanged(self, selected, *_): index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) else: self.setCurrentIndex(-1) def _on_modified(self): if self.currentIndex >= 0: editor = self.editorstack.currentWidget() self.featuremodel[self.currentIndex] = editor.editorData() self.descriptors = list(self.featuremodel) def setDescriptors(self, descriptors): """ Set a list of variable descriptors to edit. """ self.descriptors = descriptors self.featuremodel[:] = list(self.descriptors) @Inputs.data @check_sql_input def setData(self, data=None): """Set the input dataset.""" self.closeContext() self.data = data if self.data is not None: descriptors = list(self.descriptors) currindex = self.currentIndex self.descriptors = [] self.currentIndex = -1 self.openContext(data) if descriptors != self.descriptors or \ self.currentIndex != currindex: # disconnect from the selection model while reseting the model selmodel = self.featureview.selectionModel() selmodel.selectionChanged.disconnect( self._on_selectedVariableChanged) self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(self.currentIndex) selmodel.selectionChanged.connect( self._on_selectedVariableChanged) self.editorstack.setEnabled(self.currentIndex >= 0) def handleNewSignals(self): if self.data is not None: self.apply() else: self.Outputs.data.send(None) def addFeature(self, descriptor): self.featuremodel.append(descriptor) self.setCurrentIndex(len(self.featuremodel) - 1) editor = self.editorstack.currentWidget() editor.nameedit.setFocus() editor.nameedit.selectAll() def removeFeature(self, index): del self.featuremodel[index] index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) elif index is None and len(self.featuremodel) > 0: # Deleting the last item clears selection self.setCurrentIndex(len(self.featuremodel) - 1) def removeSelectedFeature(self): if self.currentIndex >= 0: self.removeFeature(self.currentIndex) def duplicateFeature(self): desc = self.featuremodel[self.currentIndex] self.addFeature(copy.deepcopy(desc)) def check_attrs_values(self, attr, data): for i in range(len(data)): for var in attr: if not math.isnan(data[i, var]) \ and int(data[i, var]) >= len(var.values): return var.name return None def _validate_descriptors(self, desc): def validate(source): try: return validate_exp(ast.parse(source, mode="eval")) except Exception: return False final = [] invalid = [] for d in desc: if validate(d.expression): final.append(d) else: final.append(d._replace(expression="")) invalid.append(d) if invalid: self.Error.invalid_expressions(", ".join(s.name for s in invalid)) return final def apply(self): self.Error.clear() if self.data is None: return desc = list(self.featuremodel) desc = self._validate_descriptors(desc) source_vars = self.data.domain.variables + self.data.domain.metas new_variables = construct_variables(desc, source_vars) attrs = [var for var in new_variables if var.is_primitive()] metas = [var for var in new_variables if not var.is_primitive()] new_domain = Orange.data.Domain( self.data.domain.attributes + tuple(attrs), self.data.domain.class_vars, metas=self.data.domain.metas + tuple(metas) ) try: data = self.data.transform(new_domain) except Exception as err: log = logging.getLogger(__name__) log.error("", exc_info=True) self.error("".join(format_exception_only(type(err), err)).rstrip()) return disc_attrs_not_ok = self.check_attrs_values( [var for var in attrs if var.is_discrete], data) if disc_attrs_not_ok: self.Error.more_values_needed(disc_attrs_not_ok) return self.Outputs.data.send(data) def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): items[feature.name] = "{} (categorical with values {}{})".format( feature.expression, feature.values, "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items( report.plural("Constructed feature{s}", len(items)), items)
class OWFeatureConstructor(OWWidget): name = "Feature Constructor" description = "Construct new features (data columns) from a set of " \ "existing features in the input dataset." icon = "icons/FeatureConstructor.svg" keywords = ['function', 'lambda'] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) want_main_area = False settingsHandler = FeatureConstructorHandler() descriptors = ContextSetting([]) currentIndex = ContextSetting(-1) EDITORS = [(ContinuousDescriptor, ContinuousFeatureEditor), (DateTimeDescriptor, DateTimeFeatureEditor), (DiscreteDescriptor, DiscreteFeatureEditor), (StringDescriptor, StringFeatureEditor)] class Error(OWWidget.Error): more_values_needed = Msg("Categorical feature {} needs more values.") invalid_expressions = Msg("Invalid expressions: {}.") class Warning(OWWidget.Warning): renamed_var = Msg("Recently added variable has been renamed, " "to avoid duplicates.\n") def __init__(self): super().__init__() self.data = None self.editors = {} box = gui.vBox(self.controlArea, "Variable Definitions") toplayout = QHBoxLayout() toplayout.setContentsMargins(0, 0, 0, 0) box.layout().addLayout(toplayout) self.editorstack = QStackedWidget(sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)) for descclass, editorclass in self.EDITORS: editor = editorclass() editor.featureChanged.connect(self._on_modified) self.editors[descclass] = editor self.editorstack.addWidget(editor) self.editorstack.setEnabled(False) buttonlayout = QVBoxLayout(spacing=10) buttonlayout.setContentsMargins(0, 0, 0, 0) self.addbutton = QPushButton("New", toolTip="Create a new variable", minimumWidth=120, shortcut=QKeySequence.New) def unique_name(fmt, reserved): candidates = (fmt.format(i) for i in count(1)) return next(c for c in candidates if c not in reserved) def generate_newname(fmt): return unique_name(fmt, self.reserved_names()) menu = QMenu(self.addbutton) cont = menu.addAction("Numeric") cont.triggered.connect(lambda: self.addFeature( ContinuousDescriptor(generate_newname("X{}"), "", 3))) disc = menu.addAction("Categorical") disc.triggered.connect(lambda: self.addFeature( DiscreteDescriptor(generate_newname("D{}"), "", (), False))) string = menu.addAction("Text") string.triggered.connect(lambda: self.addFeature( StringDescriptor(generate_newname("S{}"), ""))) datetime = menu.addAction("Date/Time") datetime.triggered.connect(lambda: self.addFeature( DateTimeDescriptor(generate_newname("T{}"), ""))) menu.addSeparator() self.duplicateaction = menu.addAction("Duplicate Selected Variable") self.duplicateaction.triggered.connect(self.duplicateFeature) self.duplicateaction.setEnabled(False) self.addbutton.setMenu(menu) self.removebutton = QPushButton("Remove", toolTip="Remove selected variable", minimumWidth=120, shortcut=QKeySequence.Delete) self.removebutton.clicked.connect(self.removeSelectedFeature) buttonlayout.addWidget(self.addbutton) buttonlayout.addWidget(self.removebutton) buttonlayout.addStretch(10) toplayout.addLayout(buttonlayout, 0) toplayout.addWidget(self.editorstack, 10) # Layout for the list view layout = QVBoxLayout(spacing=1, margin=0) self.featuremodel = DescriptorModel(parent=self) self.featureview = QListView(minimumWidth=200, minimumHeight=50, sizePolicy=QSizePolicy( QSizePolicy.Minimum, QSizePolicy.MinimumExpanding)) self.featureview.setItemDelegate(FeatureItemDelegate(self)) self.featureview.setModel(self.featuremodel) self.featureview.selectionModel().selectionChanged.connect( self._on_selectedVariableChanged) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) layout.addWidget(self.featureview) box.layout().addLayout(layout, 1) gui.button(self.buttonsArea, self, "Send", callback=self.apply, default=True) def setCurrentIndex(self, index): index = min(index, len(self.featuremodel) - 1) self.currentIndex = index if index >= 0: itemmodels.select_row(self.featureview, index) desc = self.featuremodel[min(index, len(self.featuremodel) - 1)] editor = self.editors[type(desc)] self.editorstack.setCurrentWidget(editor) editor.setEditorData(desc, self.data.domain if self.data else None) self.editorstack.setEnabled(index >= 0) self.duplicateaction.setEnabled(index >= 0) self.removebutton.setEnabled(index >= 0) def _on_selectedVariableChanged(self, selected, *_): index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) else: self.setCurrentIndex(-1) def _on_modified(self): if self.currentIndex >= 0: self.Warning.clear() editor = self.editorstack.currentWidget() proposed = editor.editorData().name unique = get_unique_names(self.reserved_names(self.currentIndex), proposed) feature = editor.editorData() if editor.editorData().name != unique: self.Warning.renamed_var() feature = feature.__class__(unique, *feature[1:]) self.featuremodel[self.currentIndex] = feature self.descriptors = list(self.featuremodel) def setDescriptors(self, descriptors): """ Set a list of variable descriptors to edit. """ self.descriptors = descriptors self.featuremodel[:] = list(self.descriptors) def reserved_names(self, idx_=None): varnames = [] if self.data is not None: varnames = [ var.name for var in self.data.domain.variables + self.data.domain.metas ] varnames += [ desc.name for idx, desc in enumerate(self.featuremodel) if idx != idx_ ] return set(varnames) @Inputs.data @check_sql_input def setData(self, data=None): """Set the input dataset.""" self.closeContext() self.data = data self.info.set_input_summary(self.info.NoInput) if self.data is not None: descriptors = list(self.descriptors) currindex = self.currentIndex self.descriptors = [] self.currentIndex = -1 self.openContext(data) self.info.set_input_summary(len(data), format_summary_details(data)) if descriptors != self.descriptors or \ self.currentIndex != currindex: # disconnect from the selection model while reseting the model selmodel = self.featureview.selectionModel() selmodel.selectionChanged.disconnect( self._on_selectedVariableChanged) self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(self.currentIndex) selmodel.selectionChanged.connect( self._on_selectedVariableChanged) self.editorstack.setEnabled(self.currentIndex >= 0) def handleNewSignals(self): if self.data is not None: self.apply() else: self.info.set_output_summary(self.info.NoOutput) self.Outputs.data.send(None) def addFeature(self, descriptor): self.featuremodel.append(descriptor) self.setCurrentIndex(len(self.featuremodel) - 1) editor = self.editorstack.currentWidget() editor.nameedit.setFocus() editor.nameedit.selectAll() def removeFeature(self, index): del self.featuremodel[index] index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) elif index is None and self.featuremodel.rowCount(): # Deleting the last item clears selection self.setCurrentIndex(self.featuremodel.rowCount() - 1) def removeSelectedFeature(self): if self.currentIndex >= 0: self.removeFeature(self.currentIndex) def duplicateFeature(self): desc = self.featuremodel[self.currentIndex] self.addFeature(copy.deepcopy(desc)) @staticmethod def check_attrs_values(attr, data): for i in range(len(data)): for var in attr: if not math.isnan(data[i, var]) \ and int(data[i, var]) >= len(var.values): return var.name return None def _validate_descriptors(self, desc): def validate(source): try: return validate_exp(ast.parse(source, mode="eval")) # ast.parse can return arbitrary errors, not only SyntaxError # pylint: disable=broad-except except Exception: return False final = [] invalid = [] for d in desc: if validate(d.expression): final.append(d) else: final.append(d._replace(expression="")) invalid.append(d) if invalid: self.Error.invalid_expressions(", ".join(s.name for s in invalid)) return final def apply(self): def report_error(err): log = logging.getLogger(__name__) log.error("", exc_info=True) self.error("".join(format_exception_only(type(err), err)).rstrip()) self.Error.clear() if self.data is None: return desc = list(self.featuremodel) desc = self._validate_descriptors(desc) try: new_variables = construct_variables(desc, self.data) # user's expression can contain arbitrary errors except Exception as err: # pylint: disable=broad-except report_error(err) return attrs = [var for var in new_variables if var.is_primitive()] metas = [var for var in new_variables if not var.is_primitive()] new_domain = Orange.data.Domain( self.data.domain.attributes + tuple(attrs), self.data.domain.class_vars, metas=self.data.domain.metas + tuple(metas)) try: for variable in new_variables: variable.compute_value.mask_exceptions = False data = self.data.transform(new_domain) # user's expression can contain arbitrary errors # pylint: disable=broad-except except Exception as err: report_error(err) return finally: for variable in new_variables: variable.compute_value.mask_exceptions = True disc_attrs_not_ok = self.check_attrs_values( [var for var in attrs if var.is_discrete], data) if disc_attrs_not_ok: self.Error.more_values_needed(disc_attrs_not_ok) return self.info.set_output_summary(len(data), format_summary_details(data)) self.Outputs.data.send(data) def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): items[ feature.name] = "{} (categorical with values {}{})".format( feature.expression, feature.values, "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) elif isinstance(feature, DateTimeDescriptor): items[feature.name] = "{} (date/time)".format( feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items(report.plural("Constructed feature{s}", len(items)), items)