def getMaxGeneralizationTargetShare(data, subgroup, weightingAttribute=None):
    selectors = subgroup.subgroupDescription.selectors
    generalizations = ut.powerset(selectors)
    maxTargetShare = 0
    for sels in generalizations:
        sgd = SubgroupDescription(list(sels))
        sg = Subgroup(subgroup.target, sgd)
        (_, _, instancesSubgroup,
         positivesSubgroup) = sg.get_base_statistics(data, weightingAttribute)
        targetShare = positivesSubgroup / instancesSubgroup
        maxTargetShare = max(maxTargetShare, targetShare)
    return maxTargetShare
Пример #2
0
    def execute(self, task):
        result = []
        queue = []
        measure_statistics_based = hasattr(task.qf,
                                           'optimisticEstimateFromStatistics')

        # init the first level
        for sel in task.searchSpace:
            queue.append((float("-inf"), [sel]))

        while (queue):
            q, candidate_description = heappop(queue)
            q = -q
            if (q) < ut.minimumRequiredQuality(result, task):
                break

            sg = Subgroup(task.target, candidate_description)

            if (measure_statistics_based):
                statistics = sg.get_base_statistics(task.data)
                ut.addIfRequired(result, sg,
                                 task.qf.evaluateFromStatistics(*statistics),
                                 task)
                optimistic_estimate = task.qf.optimisticEstimateFromStatistics(
                    *statistics) if isinstance(
                        task.qf,
                        m.BoundedInterestingnessMeasure) else float("inf")
            else:
                ut.addIfRequired(result, sg,
                                 task.qf.evaluateFromDataset(task.data, sg),
                                 task)
                optimistic_estimate = task.qf.optimisticEstimateFromDataset(
                    task.data, sg) if isinstance(
                        task.qf,
                        m.BoundedInterestingnessMeasure) else float("inf")

            # compute refinements and fill the queue
            if (len(candidate_description) < task.depth
                    and optimistic_estimate >= ut.minimumRequiredQuality(
                        result, task)):
                # iterate over all selectors that are behind the last selector contained in the evaluated candidate according to the initial order
                index_of_last_selector = min(
                    task.searchSpace.index(candidate_description[-1]),
                    len(task.searchSpace) - 1)

                for sel in islice(task.searchSpace, index_of_last_selector + 1,
                                  None):
                    new_description = candidate_description + [sel]
                    heappush(queue, (-optimistic_estimate, new_description))
        result.sort(key=lambda x: x[0], reverse=True)
        return result