Exemplo n.º 1
0
    def find_rules(self):
        if self.data is None: return
        data = self.data
        self.table.model().clear()

        n_examples = len(data)
        NumericItem = self.NumericItem
        StandardItem = self.StandardItem
        filterSearch = self.filterSearch
        itemsetMin = self.filterAntecedentMin + self.filterConsequentMin
        itemsetMax = self.filterAntecedentMax + self.filterConsequentMax
        isSizeMatch = self.isSizeMatch
        isRegexMatch = self.isRegexMatch

        X, mapping = OneHot.encode(data, self.classify)
        self.onehot_mapping = mapping
        names = {item: '{}={}'.format(var.name, val)
                 for item, var, val in OneHot.decode(mapping, data, mapping)}

        # Items that consequent must include if classifying
        class_items = {item
                       for item, var, val in OneHot.decode(mapping, data, mapping)
                       if var is data.domain.class_var} if self.classify else set()
        assert bool(class_items) == bool(self.classify)

        model = QStandardItemModel(self.table)
        for col, (label, tooltip) in enumerate([("Supp", "Support"),
                                                ("Conf", "Confidence (support / antecedent support)"),
                                                ("Covr", "Coverage (antecedent support / number of examples)"),
                                                ("Strg", "Strength (consequent support / antecedent support)"),
                                                ("Lift", "Lift (number of examples * confidence / consequent support)"),
                                                ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"),
                                                ("Antecedent", None),
                                                ("", None),
                                                ("Consequent", None)]):
            item = QStandardItem(label)
            item.setToolTip(tooltip)
            model.setHorizontalHeaderItem(col, item)

        #~ # Aggregate rules by common (support,confidence) for scatterplot
        #~ scatter_agg = defaultdict(list)

        # Find itemsets
        nRules = 0
        itemsets = {}
        progress = gui.ProgressBar(self, self.maxRules + 1)
        for itemset, support in frequent_itemsets(X, self.minSupport / 100):
            itemsets[itemset] = support

            if class_items and not class_items & itemset:
                continue

            # Filter itemset by joined filters before descending into it
            itemset_str = ' '.join(names[i] for i in itemset)
            if (filterSearch and
                (len(itemset) < itemsetMin or
                 itemsetMax < len(itemset) or
                 not isRegexMatch(itemset_str, itemset_str))):
                continue

            for rule in gen_assoc_rules(itemsets,
                                        self.minConfidence / 100,
                                        itemset):
                (left, right), support, confidence = rule

                if class_items and right - class_items:
                    continue
                if filterSearch and not isSizeMatch(len(left), len(right)):
                    continue
                left_str = ' '.join(names[i] for i in sorted(left))
                right_str = ' '.join(names[i] for i in sorted(right))
                if filterSearch and not isRegexMatch(left_str, right_str):
                    continue

                # All filters matched, calculate stats and add table row
                _, _, _, coverage, strength, lift, leverage = next(
                    rules_stats((rule,), itemsets, n_examples))

                support_item = NumericItem(support / n_examples)
                # Set row data on first column
                support_item.setData((itemset - class_items,
                                      class_items and (class_items & itemset).pop()),
                                     self.ROW_DATA_ROLE)
                left_item = StandardItem(left_str, len(left))
                left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                model.appendRow([support_item,
                                 NumericItem(confidence),
                                 NumericItem(coverage),
                                 NumericItem(strength),
                                 NumericItem(lift),
                                 NumericItem(leverage),
                                 left_item,
                                 StandardItem('→'),
                                 StandardItem(right_str, len(right))])
                #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right))
                nRules += 1
                progress.advance()
                if nRules >= self.maxRules:
                    break
            if nRules >= self.maxRules:
                break

        # Populate the TableView
        table = self.table
        table.setHidden(True)
        table.setSortingEnabled(False)
        proxy_model = self.proxy_model
        proxy_model.setSourceModel(model)
        table.setModel(proxy_model)
        for i in range(model.columnCount()):
            table.resizeColumnToContents(i)
        table.setSortingEnabled(True)
        table.setHidden(False)
        progress.finish()

        self.nRules = nRules
        self.nFilteredRules = proxy_model.rowCount()  # TODO: continue; also add in owitemsets
        self.nSelectedRules = 0
        self.nSelectedExamples = 0
Exemplo n.º 2
0
    def find_rules(self):
        if self.data is None or not len(self.data):
            return
        if self._is_running:
            return
        self._is_running = True
        data = self.data
        self.table.model().clear()

        n_examples = len(data)
        NumericItem = self.NumericItem
        StandardItem = self.StandardItem
        filterSearch = self.filterSearch
        itemsetMin = self.filterAntecedentMin + self.filterConsequentMin
        itemsetMax = self.filterAntecedentMax + self.filterConsequentMax
        isSizeMatch = self.isSizeMatch
        isRegexMatch = self.isRegexMatch

        X, mapping = OneHot.encode(data, self.classify)
        self.error(911)
        if X is None:
            self.error(911, 'Need some discrete data to work with.')

        self.onehot_mapping = mapping
        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
        names = {
            item:
            ('{}={}' if var is data.domain.class_var else ITEM_FMT).format(
                var.name, val)
            for item, var, val in OneHot.decode(mapping, data, mapping)
        }
        # Items that consequent must include if classifying
        class_items = {
            item
            for item, var, val in OneHot.decode(mapping, data, mapping)
            if var is data.domain.class_var
        } if self.classify else set()
        assert bool(class_items) == bool(self.classify)

        model = QStandardItemModel(self.table)
        for col, (label, tooltip) in enumerate([
            ("Supp", "Support"),
            ("Conf", "Confidence (support / antecedent support)"),
            ("Covr", "Coverage (antecedent support / number of examples)"),
            ("Strg", "Strength (consequent support / antecedent support)"),
            ("Lift",
             "Lift (number of examples * confidence / consequent support)"),
            ("Levr",
             "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"
             ), ("Antecedent", None), ("", None), ("Consequent", None)
        ]):
            item = QStandardItem(label)
            item.setToolTip(tooltip)
            model.setHorizontalHeaderItem(col, item)

        #~ # Aggregate rules by common (support,confidence) for scatterplot
        #~ scatter_agg = defaultdict(list)

        # Find itemsets
        nRules = 0
        itemsets = {}
        with self.progressBar(self.maxRules + 1) as progress:
            for itemset, support in frequent_itemsets(X,
                                                      self.minSupport / 100):
                itemsets[itemset] = support

                if class_items and not class_items & itemset:
                    continue

                # Filter itemset by joined filters before descending into it
                itemset_str = ' '.join(names[i] for i in itemset)
                if (filterSearch and
                    (len(itemset) < itemsetMin or itemsetMax < len(itemset)
                     or not isRegexMatch(itemset_str, itemset_str))):
                    continue

                for rule in association_rules(itemsets,
                                              self.minConfidence / 100,
                                              itemset):
                    left, right, support, confidence = rule

                    if class_items and right - class_items:
                        continue
                    if filterSearch and not isSizeMatch(len(left), len(right)):
                        continue
                    left_str = ', '.join(names[i] for i in sorted(left))
                    right_str = ', '.join(names[i] for i in sorted(right))
                    if filterSearch and not isRegexMatch(left_str, right_str):
                        continue

                    # All filters matched, calculate stats and add table row
                    _, _, _, _, coverage, strength, lift, leverage = next(
                        rules_stats((rule, ), itemsets, n_examples))

                    support_item = NumericItem(support / n_examples)
                    # Set row data on first column
                    support_item.setData(
                        (itemset - class_items, class_items and
                         (class_items & itemset).pop()), self.ROW_DATA_ROLE)
                    left_item = StandardItem(left_str, len(left))
                    left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    model.appendRow([
                        support_item,
                        NumericItem(confidence),
                        NumericItem(coverage),
                        NumericItem(strength),
                        NumericItem(lift),
                        NumericItem(leverage), left_item,
                        StandardItem('→'),
                        StandardItem(right_str, len(right))
                    ])
                    #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right))
                    nRules += 1
                    progress.advance()
                    if nRules >= self.maxRules:
                        break
                if nRules >= self.maxRules:
                    break

        # Populate the TableView
        table = self.table
        table.setHidden(True)
        table.setSortingEnabled(False)
        proxy_model = self.proxy_model
        proxy_model.setSourceModel(model)
        table.setModel(proxy_model)
        for i in range(model.columnCount()):
            table.resizeColumnToContents(i)
        table.setSortingEnabled(True)
        table.setHidden(False)

        self.nRules = nRules
        self.nFilteredRules = proxy_model.rowCount(
        )  # TODO: continue; also add in owitemsets
        self.nSelectedRules = 0
        self.nSelectedExamples = 0
        self._is_running = False
Exemplo n.º 3
0
    def find_itemsets(self):
        if self.data is None:
            return
        if self._is_running:
            return
        self._is_running = True

        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.onehot_mapping = mapping
        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
        names = {
            item: ITEM_FMT.format(var.name, val)
            for item, var, val in OneHot.decode(mapping.keys(), data, mapping)
        }
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        with self.progressBar(self.maxItemsets + 1) as progress:
            for itemset, support in frequent_itemsets(X,
                                                      self.minSupport / 100):

                if filterSearch and not filterMinItems <= len(
                        itemset) <= filterMaxItems:
                    continue

                parent = top
                first_new_item = None
                itemset_matches_filter = False

                for item in sorted(itemset):
                    name = names[item]

                    if filterSearch and not itemset_matches_filter:
                        itemset_matches_filter = isRegexMatch(name)

                    child = parent.get(name)
                    if child is None:
                        try:
                            wi = self.TreeWidgetItem(parent.item, [
                                name,
                                str(support), '{:.4g}'.format(
                                    100 * support / len(data))
                            ])
                        except RuntimeError:
                            # FIXME: When autoFind was in effect and the support
                            # slider was moved, this line excepted with:
                            #     RuntimeError: wrapped C/C++ object of type
                            #                   TreeWidgetItem has been deleted
                            return
                        wi.setData(0, self.ITEM_DATA_ROLE, item)
                        child = parent[name] = ItemDict(wi)

                        if first_new_item is None:
                            first_new_item = (parent, name)
                    parent = child

                if filterSearch and not itemset_matches_filter:
                    parent, name = first_new_item
                    parent.item.removeChild(parent[name].item)
                    del parent[name].item
                    del parent[name]
                else:
                    nItemsets += 1
                    progress.advance()
                if nItemsets >= self.maxItemsets:
                    break

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        self._is_running = False
Exemplo n.º 4
0
    def find_itemsets(self):
        if self.data is None: return
        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.onehot_mapping = mapping
        names = {item: '{}={}'.format(var.name, val)
                 for item, var, val in OneHot.decode(mapping.keys(), data, mapping)}
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        progress = gui.ProgressBar(self, self.maxItemsets + 1)
        for itemset, support in frequent_itemsets(X, self.minSupport / 100):

            if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems:
                continue

            parent = top
            first_new_item = None
            itemset_matches_filter = False

            for item in sorted(itemset):
                name = names[item]

                if filterSearch and not itemset_matches_filter:
                    itemset_matches_filter = isRegexMatch(name)

                child = parent.get(name)
                if child is None:
                    wi = self.TreeWidgetItem(parent.item, [name, str(support), '{:.1f}'.format(100 * support / len(data))])
                    wi.setData(0, self.ITEM_DATA_ROLE, item)
                    child = parent[name] = ItemDict(wi)

                    if first_new_item is None:
                        first_new_item = (parent, name)
                parent = child

            if filterSearch and not itemset_matches_filter:
                parent, name = first_new_item
                parent.item.removeChild(parent[name].item)
                del parent[name].item
                del parent[name]
            else:
                nItemsets += 1
                progress.advance()
            if nItemsets >= self.maxItemsets:
                break

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        progress.finish()
Exemplo n.º 5
0
    def find_itemsets(self):
        if self.data is None:
            return
        if self._is_running:
            return
        self._is_running = True

        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.onehot_mapping = mapping
        ITEM_FMT = "{}" if issparse(data.X) else "{}={}"
        names = {
            item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping)
        }
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        with self.progressBar(self.maxItemsets + 1) as progress:
            for itemset, support in frequent_itemsets(X, self.minSupport / 100):

                if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems:
                    continue

                parent = top
                first_new_item = None
                itemset_matches_filter = False

                for item in sorted(itemset):
                    name = names[item]

                    if filterSearch and not itemset_matches_filter:
                        itemset_matches_filter = isRegexMatch(name)

                    child = parent.get(name)
                    if child is None:
                        try:
                            wi = self.TreeWidgetItem(
                                parent.item, [name, str(support), "{:.4g}".format(100 * support / len(data))]
                            )
                        except RuntimeError:
                            # FIXME: When autoFind was in effect and the support
                            # slider was moved, this line excepted with:
                            #     RuntimeError: wrapped C/C++ object of type
                            #                   TreeWidgetItem has been deleted
                            return
                        wi.setData(0, self.ITEM_DATA_ROLE, item)
                        child = parent[name] = ItemDict(wi)

                        if first_new_item is None:
                            first_new_item = (parent, name)
                    parent = child

                if filterSearch and not itemset_matches_filter:
                    parent, name = first_new_item
                    parent.item.removeChild(parent[name].item)
                    del parent[name].item
                    del parent[name]
                else:
                    nItemsets += 1
                    progress.advance()
                if nItemsets >= self.maxItemsets:
                    break

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        self._is_running = False
Exemplo n.º 6
0
    def find_rules(self):
        if self.data is None or not len(self.data):
            return
        if self._is_running:
            self._is_running = False
            return

        self.button.button.setText('Cancel')

        self._is_running = True
        data = self.data
        self.table.model().clear()

        n_examples = len(data)
        NumericItem = self.NumericItem
        StandardItem = self.StandardItem
        filterSearch = self.filterSearch
        itemsetMin = self.filterAntecedentMin + self.filterConsequentMin
        itemsetMax = self.filterAntecedentMax + self.filterConsequentMax
        isSizeMatch = self.isSizeMatch
        isRegexMatch = self.isRegexMatch

        X, mapping = OneHot.encode(data, self.classify)
        self.Error.need_discrete_data.clear()
        if X is None:
            self.Error.need_discrete_data()

        self.onehot_mapping = mapping
        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
        names = {item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format(var.name, val)
                 for item, var, val in OneHot.decode(mapping, data, mapping)}
        # Items that consequent must include if classifying
        class_items = {item
                       for item, var, val in OneHot.decode(mapping, data, mapping)
                       if var is data.domain.class_var} if self.classify else set()
        assert bool(class_items) == bool(self.classify)

        model = QStandardItemModel(self.table)
        for col, (label, _, tooltip) in enumerate(self.header):
            item = QStandardItem(label)
            item.setToolTip(tooltip)
            model.setHorizontalHeaderItem(col, item)

        # Find itemsets
        nRules = 0
        itemsets = {}
        ARROW_ITEM = StandardItem('→')
        ARROW_ITEM.setTextAlignment(Qt.AlignCenter)
        with self.progressBar(self.maxRules + 1) as progress:
            for itemset, support in frequent_itemsets(X, self.minSupport / 100):
                itemsets[itemset] = support

                if class_items and not class_items & itemset:
                    continue

                # Filter itemset by joined filters before descending into it
                itemset_str = ' '.join(names[i] for i in itemset)
                if (filterSearch and
                    (len(itemset) < itemsetMin or
                     itemsetMax < len(itemset) or
                     not isRegexMatch(itemset_str, itemset_str))):
                    continue

                for rule in association_rules(itemsets,
                                              self.minConfidence / 100,
                                              itemset):
                    left, right, support, confidence = rule

                    if class_items and right - class_items:
                        continue
                    if filterSearch and not isSizeMatch(len(left), len(right)):
                        continue
                    left_str =  ', '.join(names[i] for i in sorted(left))
                    right_str = ', '.join(names[i] for i in sorted(right))
                    if filterSearch and not isRegexMatch(left_str, right_str):
                        continue

                    # All filters matched, calculate stats and add table row
                    _, _, _, _, coverage, strength, lift, leverage = next(
                        rules_stats((rule,), itemsets, n_examples))

                    support_item = NumericItem(support / n_examples)
                    # Set row data on first column
                    support_item.setData((itemset - class_items,
                                          class_items and (class_items & itemset).pop()),
                                         self.ROW_DATA_ROLE)
                    left_item = StandardItem(left_str, len(left))
                    left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    model.appendRow([support_item,
                                     NumericItem(confidence),
                                     NumericItem(coverage),
                                     NumericItem(strength),
                                     NumericItem(lift),
                                     NumericItem(leverage),
                                     left_item,
                                     ARROW_ITEM.clone(),
                                     StandardItem(right_str, len(right))])
                    nRules += 1
                    progress.advance()

                    if not self._is_running or nRules >= self.maxRules:
                        break

                qApp.processEvents()

                if not self._is_running or nRules >= self.maxRules:
                    break

        # Populate the TableView
        table = self.table
        table.setHidden(True)
        table.setSortingEnabled(False)
        proxy_model = self.proxy_model
        proxy_model.setSourceModel(model)
        table.setModel(proxy_model)
        for i in range(model.columnCount()):
            table.resizeColumnToContents(i)
        table.setSortingEnabled(True)
        table.setHidden(False)
        self.table_rules = proxy_model.get_data()
        if self.table_rules is not None:
            self.Outputs.rules.send(self.table_rules)

        self.button.button.setText('Find Rules')

        self.nRules = nRules
        self.nFilteredRules = proxy_model.rowCount()  # TODO: continue; also add in owitemsets
        self.nSelectedRules = 0
        self.nSelectedExamples = 0
        self._is_running = False