Exemplo n.º 1
0
    def __init__(self, data, tree=None):
        self.__selection = set()

        self.__examples = []

        if tree is not None:
            self.__tree = tree
        else:
            self.__tree = ClusterTree(data)
        self.__clusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__constant, self.__constantCount = False, 0
        self.__oldconstant, self.__oldconscount = False, 0
        self.__score = None
Exemplo n.º 2
0
    def __init__(self, data, tree=None):
        self.__pselection = set()
        self.__nselection = set()

        self.__examples = []
        self.__positives = []
        self.__negatives = []

        if tree is None:
            self.__tree = ClusterTree(data)
        else:
            self.__tree = tree
        self.__pclusters = None
        self.__nclusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__score = None
Exemplo n.º 3
0
    def __init__(self, data, tree=None):
        self.__selection = set()

        self.__examples = []
        
        if tree is not None:
            self.__tree = tree
        else:
            self.__tree = ClusterTree(data)
        self.__clusters = None
        
        self.__weights = [-1.0, 1.0, 0.667]
        self.__constant, self.__constantCount = False, 0
        self.__oldconstant, self.__oldconscount = False, 0
        self.__score = None
Exemplo n.º 4
0
    def __init__(self, data, tree=None):
        self.__pselection = set()
        self.__nselection = set()

        self.__examples = []
        self.__positives = []
        self.__negatives = []
        
        if tree is None:
            self.__tree = ClusterTree(data)
        else:
            self.__tree = tree
        self.__pclusters = None
        self.__nclusters = None
        
        self.__weights = [-1.0, 1.0, 0.667]
        self.__score = None
Exemplo n.º 5
0
class PNClassifier:
    def __init__(self, data, tree=None):
        self.__pselection = set()
        self.__nselection = set()

        self.__examples = []
        self.__positives = []
        self.__negatives = []
        
        if tree is None:
            self.__tree = ClusterTree(data)
        else:
            self.__tree = tree
        self.__pclusters = None
        self.__nclusters = None
        
        self.__weights = [-1.0, 1.0, 0.667]
        self.__score = None

    '''
    ACCESSORS
    '''
    def get_tree(self):
        # TODO: should be copied
        return self.__tree

    def get_pselection(self):
        return list(self.__pselection)

    def get_nselection(self):
        return list(self.__nselection)

    def get_pclusters(self):
        return list(self.__pclusters)

    def get_nclusters(self):
        return list(self.__nclusters)

    def get_examples(self):
        return list(self.__examples)

    def set_weights(self, weights):
        self.__weights = list(weights)

    def get_pscore(self):
        return self.__pscore

    def get_nscore(self):
        return self.__nscore

    '''
    ACT
    '''
    def add_example(self, example):
        self.__examples.append(example)
        if example[1] > 0:
            self.__positives.append(example[0])
        else:
            self.__negatives.append(example[0])
        self.__recluster()
        self.__reselect()

    def pop_example(self):
        last = self.__examples.pop()
        if last[1] > 0:
            self.__positives.pop()
        else:
            self.__negatives.pop()
        self.__recluster()
        self.__reselect()

    def reset(self):
        self.__pselection = set()
        self.__nselection = set()

        self.__examples = []
        self.__positives = []
        self.__negatives  = []

        self.__pclusters = None
        self.__nclusters  = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__pscore = None
        self.__nscore = None

    def __recluster(self):
        tree = self.__tree
        self.__pclusters, pscores = self.__climb(tree, 1)
        self.__nclusters, nscores = self.__climb(tree, -1)
        self.__pscore = self.__aggregate_score(pscores, 1)
        self.__nscore = self.__aggregate_score(nscores, -1)

    def __reselect(self):
        self.__pselection = set()
        for cluster in self.__pclusters:
            self.__pselection.update(self.__tree.get_children(cluster))
        self.__nselection = set()
        for cluster in self.__nclusters:
            self.__nselection.update(self.__tree.get_children(cluster))

    def __climb(self, tree, pole):
        if pole > 0:
            return self.__pos_climb(tree)
        else:
            return self.__neg_climb(tree)

    def __pos_climb(self, tree):
        def climb_condition(cluster):
            condition = (cluster is not None \
                and cluster.ccount == cluster.ecount)
            return condition

        def merge(cluster):
            cluster.parent.collect.update(cluster.collect)
            wavg_prev = cluster.parent.max_score * (cluster.parent.ecount - cluster.ecount)
            wavg_new = cluster.max_score * cluster.ecount
            cluster.parent.max_score = (wavg_prev + wavg_new) / cluster.parent.ecount
            top.remove(cluster.parent.id)

        examples, scoref = self.__examples, self.__cluster_score

        clusters = [tree.leaves[tree.translate(example)] for example in examples]
        self.__augment_counts(tree, examples)
        # remove duplicate clusters and sort
        clusters = list(set(clusters))
        clusters.sort(key=lambda cluster: cluster.data[0])
        for cluster in clusters:
            cluster.ccount = cluster.ecount

        # climb tree and collect best clusters
        top = set()
        for cidx in range(len(clusters)):
            cluster = clusters[cidx]
            while (climb_condition(cluster)):
                print 'climb', cluster.id
                score = self.__cluster_score(cluster)
                if score >= cluster.max_score:
                    cluster.max_score = score
                    cluster.collect = {cluster.id: score}
                if cluster.parent is not None:
                    cluster.parent.ccount += cluster.ecount
                    if cluster.parent.ccount != cluster.parent.ecount or cluster.parent.ccount == cluster.ccount:
                        cluster.parent.collect = cluster.collect
                        cluster.parent.max_score = cluster.max_score
                    else:
                        merge(cluster)
                cluster = cluster.parent
            if cluster is not None:
                top.add(cluster.id)
            else:
                top.add(tree.root.id)
        rclusters, rscores = self.__aggregate_clusters(tree, top)
        self.__clean_tree(tree, examples)
    
        return rclusters, rscores

    def __neg_climb(self, tree):
        negatives = self.__negatives
        clusters = [tree.leaves[tree.translate(negative)] for negative in negatives]
        clusters = list(set(clusters))
        clusters.sort(key=lambda cluster: cluster.data[0])
        self.__augment_counts(tree, negatives)
        blacklist = self.__create_blacklist(tree, self.__positives)
        # climb consists of going up as far as possible (until you hit a positive) 
        top = set()
        for cluster in clusters:
            while (cluster is not None and cluster not in blacklist):
                cluster = cluster.parent
            top.add(cluster.id)
        rclusters, rscores = self.__aggregate_clusters(tree, top, -1)
        self.__clean_tree(tree, negatives)
        return rclusters, rscores

    def __aggregate_clusters(self, tree, top, pole):
        if pole > 0:
            chosen = {}
            for cid in top:
                chosen.update(tree.clusters[cid].collect)
            cids = sorted(chosen.keys())
            clusters = [tree.clusters[cid] for cid in cids]
            scores = [chosen[cid] for cid in cids]
            return clusters, scores
        else:
            clusters = sorted(list(top))
            scores = [cluster.count*cluster.ecount for cluster in clusters]
            return clusters, scores

    def __aggregate_score(self, scores, pole):
        if pole > 0:
            return sum(scores) / len(scores)
        else:
            return max(scores)

    def __clean_tree(self, tree, examples):
        clusters = [tree.leaves[tree.translate(example)] for example in examples]
        traces = [tree.trace(tree.root, cluster) for cluster in clusters]
        seen = set()
        for trace in traces:
            for cluster in trace:
                if cluster.id not in seen:
                    seen.add(cluster.id)
                    del cluster.ccount; del cluster.ecount
                    del cluster.collect; del cluster.max_score

    def __augment_counts(self, tree, examples):
        traces = [tree.trace(tree.root, tree.leaves[tree.translate(example)]) for example in examples]
        counts = defaultdict(int)
        for trace in traces:
            for cluster in trace:
                counts[cluster.id] += 1
        for cid in counts:
            cluster = tree.clusters[cid]
            cluster.ecount = counts[cid]
            cluster.ccount = 0
            cluster.max_score = -9000
            cluster.collect = None
        
    def __create_blacklist(self, tree, examples):
        blacklist = set()
        for example in examples:
            cluster = tree.leaves[tree.translate(example)]
            while cluster is not None and cluster not in blacklist:
                blacklist.add(cluster.id)
                cluster = cluster.parent
        return blacklist

    def __cluster_score(self, cluster):
        weights = self.__weights
        score = (weights[0]*cluster.range + weights[1]*cluster.count) * weights[2] * cluster.ecount
        return score
Exemplo n.º 6
0
class PClassifier:
    constantLength = 2
    def __init__(self, data, tree=None):
        self.__selection = set()

        self.__examples = []
        
        if tree is not None:
            self.__tree = tree
        else:
            self.__tree = ClusterTree(data)
        self.__clusters = None
        
        self.__weights = [-1.0, 1.0, 0.667]
        self.__constant, self.__constantCount = False, 0
        self.__oldconstant, self.__oldconscount = False, 0
        self.__score = None

    '''
    ACCESSORS
    '''
    def get_tree(self):
        # TODO: should be copied
        return self.__tree

    def get_selection(self):
        return list(self.__selection)

    def get_clusters(self):
        return list(self.__clusters)

    def get_examples(self):
        return list(self.__examples)

    def set_weights(self, weights):
        self.__weights = list(weights)

    def get_score(self):
        return self.__score

    def get_constant(self):
        return self.__constant

    '''
    ACT
    '''
    def add_example(self, example):
        self.__examples.append(example)
        self.__recluster()
        self.__reselect()

    def pop_example(self):
        self.__examples.pop()
        self.__constant = self.__oldconstant
        self.__constantCount = self.__oldconscount
        if len(self.__examples) == 0:
            self.__selection = set()
            self.__clusters = None
            self.__score = None
            return
        self.__recluster()
        self.__reselect()

    def reset(self):
        self.__selection = set()
        self.__examples = []

        self.__clusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__score = None

    def __recluster(self):
        tree = self.__tree
        nclusters, scores = self.__climb(tree)
        self.__oldconscount = self.__constantCount
        self.__oldconstant = self.__constant
        if (self.__clusters is not None and set(nclusters) == set(self.__clusters)):
            self.__constantCount += 1
            if self.__constantCount >= self.constantLength:
                self.__constant = True
            else:
                self.__constant = False
                self.__constantCount = 1
        else:
            self.__constant = False
            self.__constantCount = 1
        self.__clusters = nclusters 
        self.__score = self.__aggregate_score(scores)

    def __reselect(self):
        self.__selection = set()
        for cluster in self.__clusters:
            self.__selection.update(self.__tree.get_children(cluster))

    def __climb(self, tree):
        def climb_condition(cluster):
            condition = (cluster is not None \
                and cluster.ccount == cluster.ecount)
            return condition

        def merge(cluster):
            cluster.parent.collect.update(cluster.collect)
            wavg_prev = cluster.parent.max_score * (cluster.parent.ecount - cluster.ecount)
            wavg_new = cluster.max_score * cluster.ecount
            cluster.parent.max_score = (wavg_prev + wavg_new) / cluster.parent.ecount
            top.remove(cluster.parent.id)

        examples, scoref = self.__examples, self.__cluster_score

        clusters = [tree.leaves[tree.translate(example)] for example in examples]
        self.__augment_counts(tree, examples)
        # remove duplicate clusters and sort
        clusters = list(set(clusters))
        # clusters.sort(key=lambda cluster: cluster.data[0])
        for cluster in clusters:
            cluster.ccount = cluster.ecount

        # climb tree and collect best clusters
        top = set()
        for cidx in range(len(clusters)):
            cluster = clusters[cidx]
            while (climb_condition(cluster)):
                print 'climb', cluster.id
                score = self.__cluster_score(cluster)
                if score >= cluster.max_score:
                    cluster.max_score = score
                    cluster.collect = {cluster.id: score}
                if cluster.parent is not None:
                    cluster.parent.ccount += cluster.ecount
                    if cluster.parent.ccount != cluster.parent.ecount or cluster.parent.ccount == cluster.ccount:
                        cluster.parent.collect = cluster.collect
                        cluster.parent.max_score = cluster.max_score
                    else:
                        merge(cluster)
                cluster = cluster.parent
            if cluster is not None:
                top.add(cluster.id)
            else:
                top.add(tree.root.id)
        rclusters, rscores = self.__aggregate_clusters(tree, top)
        self.__clean_tree(tree, examples)
    
        return rclusters, rscores

    def __clean_tree(self, tree, examples):
        clusters = [tree.leaves[tree.translate(example)] for example in examples]
        traces = [tree.trace(tree.root, cluster) for cluster in clusters]
        seen = set()
        for trace in traces:
            for cluster in trace:
                if cluster.id not in seen:
                    seen.add(cluster.id)
                    del cluster.ccount; del cluster.ecount
                    del cluster.collect; del cluster.max_score
        
    def __aggregate_clusters(self, tree, top):
        chosen = {}
        for cid in top:
            chosen.update(tree.clusters[cid].collect)
        cids = sorted(chosen.keys())
        clusters = [tree.clusters[cid] for cid in cids]
        scores = [chosen[cid] for cid in cids]
        return clusters, scores

    def __augment_counts(self, tree, examples):
        traces = [tree.trace(tree.root, tree.leaves[tree.translate(example)]) for example in examples]
        counts = defaultdict(int)
        for trace in traces:
            for cluster in trace:
                counts[cluster.id] += 1
        for cid in counts:
            cluster = tree.clusters[cid]
            cluster.ecount = counts[cid]
            cluster.ccount = 0
            cluster.max_score = -9000
            cluster.collect = None

    def __cluster_score(self, cluster):
        weights = self.__weights
        score = (weights[0]*cluster.range + weights[1]*cluster.count) * weights[2] * cluster.ecount
        return score

    def __aggregate_score(self, scores):
        return sum(scores) / len(scores)
Exemplo n.º 7
0
class PNClassifier:
    def __init__(self, data, tree=None):
        self.__pselection = set()
        self.__nselection = set()

        self.__examples = []
        self.__positives = []
        self.__negatives = []

        if tree is None:
            self.__tree = ClusterTree(data)
        else:
            self.__tree = tree
        self.__pclusters = None
        self.__nclusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__score = None

    '''
    ACCESSORS
    '''

    def get_tree(self):
        # TODO: should be copied
        return self.__tree

    def get_pselection(self):
        return list(self.__pselection)

    def get_nselection(self):
        return list(self.__nselection)

    def get_pclusters(self):
        return list(self.__pclusters)

    def get_nclusters(self):
        return list(self.__nclusters)

    def get_examples(self):
        return list(self.__examples)

    def set_weights(self, weights):
        self.__weights = list(weights)

    def get_pscore(self):
        return self.__pscore

    def get_nscore(self):
        return self.__nscore

    '''
    ACT
    '''

    def add_example(self, example):
        self.__examples.append(example)
        if example[1] > 0:
            self.__positives.append(example[0])
        else:
            self.__negatives.append(example[0])
        self.__recluster()
        self.__reselect()

    def pop_example(self):
        last = self.__examples.pop()
        if last[1] > 0:
            self.__positives.pop()
        else:
            self.__negatives.pop()
        self.__recluster()
        self.__reselect()

    def reset(self):
        self.__pselection = set()
        self.__nselection = set()

        self.__examples = []
        self.__positives = []
        self.__negatives = []

        self.__pclusters = None
        self.__nclusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__pscore = None
        self.__nscore = None

    def __recluster(self):
        tree = self.__tree
        self.__pclusters, pscores = self.__climb(tree, 1)
        self.__nclusters, nscores = self.__climb(tree, -1)
        self.__pscore = self.__aggregate_score(pscores, 1)
        self.__nscore = self.__aggregate_score(nscores, -1)

    def __reselect(self):
        self.__pselection = set()
        for cluster in self.__pclusters:
            self.__pselection.update(self.__tree.get_children(cluster))
        self.__nselection = set()
        for cluster in self.__nclusters:
            self.__nselection.update(self.__tree.get_children(cluster))

    def __climb(self, tree, pole):
        if pole > 0:
            return self.__pos_climb(tree)
        else:
            return self.__neg_climb(tree)

    def __pos_climb(self, tree):
        def climb_condition(cluster):
            condition = (cluster is not None \
                and cluster.ccount == cluster.ecount)
            return condition

        def merge(cluster):
            cluster.parent.collect.update(cluster.collect)
            wavg_prev = cluster.parent.max_score * (cluster.parent.ecount -
                                                    cluster.ecount)
            wavg_new = cluster.max_score * cluster.ecount
            cluster.parent.max_score = (wavg_prev +
                                        wavg_new) / cluster.parent.ecount
            top.remove(cluster.parent.id)

        examples, scoref = self.__examples, self.__cluster_score

        clusters = [
            tree.leaves[tree.translate(example)] for example in examples
        ]
        self.__augment_counts(tree, examples)
        # remove duplicate clusters and sort
        clusters = list(set(clusters))
        clusters.sort(key=lambda cluster: cluster.data[0])
        for cluster in clusters:
            cluster.ccount = cluster.ecount

        # climb tree and collect best clusters
        top = set()
        for cidx in range(len(clusters)):
            cluster = clusters[cidx]
            while (climb_condition(cluster)):
                print 'climb', cluster.id
                score = self.__cluster_score(cluster)
                if score >= cluster.max_score:
                    cluster.max_score = score
                    cluster.collect = {cluster.id: score}
                if cluster.parent is not None:
                    cluster.parent.ccount += cluster.ecount
                    if cluster.parent.ccount != cluster.parent.ecount or cluster.parent.ccount == cluster.ccount:
                        cluster.parent.collect = cluster.collect
                        cluster.parent.max_score = cluster.max_score
                    else:
                        merge(cluster)
                cluster = cluster.parent
            if cluster is not None:
                top.add(cluster.id)
            else:
                top.add(tree.root.id)
        rclusters, rscores = self.__aggregate_clusters(tree, top)
        self.__clean_tree(tree, examples)

        return rclusters, rscores

    def __neg_climb(self, tree):
        negatives = self.__negatives
        clusters = [
            tree.leaves[tree.translate(negative)] for negative in negatives
        ]
        clusters = list(set(clusters))
        clusters.sort(key=lambda cluster: cluster.data[0])
        self.__augment_counts(tree, negatives)
        blacklist = self.__create_blacklist(tree, self.__positives)
        # climb consists of going up as far as possible (until you hit a positive)
        top = set()
        for cluster in clusters:
            while (cluster is not None and cluster not in blacklist):
                cluster = cluster.parent
            top.add(cluster.id)
        rclusters, rscores = self.__aggregate_clusters(tree, top, -1)
        self.__clean_tree(tree, negatives)
        return rclusters, rscores

    def __aggregate_clusters(self, tree, top, pole):
        if pole > 0:
            chosen = {}
            for cid in top:
                chosen.update(tree.clusters[cid].collect)
            cids = sorted(chosen.keys())
            clusters = [tree.clusters[cid] for cid in cids]
            scores = [chosen[cid] for cid in cids]
            return clusters, scores
        else:
            clusters = sorted(list(top))
            scores = [cluster.count * cluster.ecount for cluster in clusters]
            return clusters, scores

    def __aggregate_score(self, scores, pole):
        if pole > 0:
            return sum(scores) / len(scores)
        else:
            return max(scores)

    def __clean_tree(self, tree, examples):
        clusters = [
            tree.leaves[tree.translate(example)] for example in examples
        ]
        traces = [tree.trace(tree.root, cluster) for cluster in clusters]
        seen = set()
        for trace in traces:
            for cluster in trace:
                if cluster.id not in seen:
                    seen.add(cluster.id)
                    del cluster.ccount
                    del cluster.ecount
                    del cluster.collect
                    del cluster.max_score

    def __augment_counts(self, tree, examples):
        traces = [
            tree.trace(tree.root, tree.leaves[tree.translate(example)])
            for example in examples
        ]
        counts = defaultdict(int)
        for trace in traces:
            for cluster in trace:
                counts[cluster.id] += 1
        for cid in counts:
            cluster = tree.clusters[cid]
            cluster.ecount = counts[cid]
            cluster.ccount = 0
            cluster.max_score = -9000
            cluster.collect = None

    def __create_blacklist(self, tree, examples):
        blacklist = set()
        for example in examples:
            cluster = tree.leaves[tree.translate(example)]
            while cluster is not None and cluster not in blacklist:
                blacklist.add(cluster.id)
                cluster = cluster.parent
        return blacklist

    def __cluster_score(self, cluster):
        weights = self.__weights
        score = (weights[0] * cluster.range +
                 weights[1] * cluster.count) * weights[2] * cluster.ecount
        return score
Exemplo n.º 8
0
class PClassifier:
    constantLength = 2

    def __init__(self, data, tree=None):
        self.__selection = set()

        self.__examples = []

        if tree is not None:
            self.__tree = tree
        else:
            self.__tree = ClusterTree(data)
        self.__clusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__constant, self.__constantCount = False, 0
        self.__oldconstant, self.__oldconscount = False, 0
        self.__score = None

    '''
    ACCESSORS
    '''

    def get_tree(self):
        # TODO: should be copied
        return self.__tree

    def get_selection(self):
        return list(self.__selection)

    def get_clusters(self):
        return list(self.__clusters)

    def get_examples(self):
        return list(self.__examples)

    def set_weights(self, weights):
        self.__weights = list(weights)

    def get_score(self):
        return self.__score

    def get_constant(self):
        return self.__constant

    '''
    ACT
    '''

    def add_example(self, example):
        self.__examples.append(example)
        self.__recluster()
        self.__reselect()

    def pop_example(self):
        self.__examples.pop()
        self.__constant = self.__oldconstant
        self.__constantCount = self.__oldconscount
        if len(self.__examples) == 0:
            self.__selection = set()
            self.__clusters = None
            self.__score = None
            return
        self.__recluster()
        self.__reselect()

    def reset(self):
        self.__selection = set()
        self.__examples = []

        self.__clusters = None

        self.__weights = [-1.0, 1.0, 0.667]
        self.__score = None

    def __recluster(self):
        tree = self.__tree
        nclusters, scores = self.__climb(tree)
        self.__oldconscount = self.__constantCount
        self.__oldconstant = self.__constant
        if (self.__clusters is not None
                and set(nclusters) == set(self.__clusters)):
            self.__constantCount += 1
            if self.__constantCount >= self.constantLength:
                self.__constant = True
            else:
                self.__constant = False
                self.__constantCount = 1
        else:
            self.__constant = False
            self.__constantCount = 1
        self.__clusters = nclusters
        self.__score = self.__aggregate_score(scores)

    def __reselect(self):
        self.__selection = set()
        for cluster in self.__clusters:
            self.__selection.update(self.__tree.get_children(cluster))

    def __climb(self, tree):
        def climb_condition(cluster):
            condition = (cluster is not None \
                and cluster.ccount == cluster.ecount)
            return condition

        def merge(cluster):
            cluster.parent.collect.update(cluster.collect)
            wavg_prev = cluster.parent.max_score * (cluster.parent.ecount -
                                                    cluster.ecount)
            wavg_new = cluster.max_score * cluster.ecount
            cluster.parent.max_score = (wavg_prev +
                                        wavg_new) / cluster.parent.ecount
            top.remove(cluster.parent.id)

        examples, scoref = self.__examples, self.__cluster_score

        clusters = [
            tree.leaves[tree.translate(example)] for example in examples
        ]
        self.__augment_counts(tree, examples)
        # remove duplicate clusters and sort
        clusters = list(set(clusters))
        # clusters.sort(key=lambda cluster: cluster.data[0])
        for cluster in clusters:
            cluster.ccount = cluster.ecount

        # climb tree and collect best clusters
        top = set()
        for cidx in range(len(clusters)):
            cluster = clusters[cidx]
            while (climb_condition(cluster)):
                print 'climb', cluster.id
                score = self.__cluster_score(cluster)
                if score >= cluster.max_score:
                    cluster.max_score = score
                    cluster.collect = {cluster.id: score}
                if cluster.parent is not None:
                    cluster.parent.ccount += cluster.ecount
                    if cluster.parent.ccount != cluster.parent.ecount or cluster.parent.ccount == cluster.ccount:
                        cluster.parent.collect = cluster.collect
                        cluster.parent.max_score = cluster.max_score
                    else:
                        merge(cluster)
                cluster = cluster.parent
            if cluster is not None:
                top.add(cluster.id)
            else:
                top.add(tree.root.id)
        rclusters, rscores = self.__aggregate_clusters(tree, top)
        self.__clean_tree(tree, examples)

        return rclusters, rscores

    def __clean_tree(self, tree, examples):
        clusters = [
            tree.leaves[tree.translate(example)] for example in examples
        ]
        traces = [tree.trace(tree.root, cluster) for cluster in clusters]
        seen = set()
        for trace in traces:
            for cluster in trace:
                if cluster.id not in seen:
                    seen.add(cluster.id)
                    del cluster.ccount
                    del cluster.ecount
                    del cluster.collect
                    del cluster.max_score

    def __aggregate_clusters(self, tree, top):
        chosen = {}
        for cid in top:
            chosen.update(tree.clusters[cid].collect)
        cids = sorted(chosen.keys())
        clusters = [tree.clusters[cid] for cid in cids]
        scores = [chosen[cid] for cid in cids]
        return clusters, scores

    def __augment_counts(self, tree, examples):
        traces = [
            tree.trace(tree.root, tree.leaves[tree.translate(example)])
            for example in examples
        ]
        counts = defaultdict(int)
        for trace in traces:
            for cluster in trace:
                counts[cluster.id] += 1
        for cid in counts:
            cluster = tree.clusters[cid]
            cluster.ecount = counts[cid]
            cluster.ccount = 0
            cluster.max_score = -9000
            cluster.collect = None

    def __cluster_score(self, cluster):
        weights = self.__weights
        score = (weights[0] * cluster.range +
                 weights[1] * cluster.count) * weights[2] * cluster.ecount
        return score

    def __aggregate_score(self, scores):
        return sum(scores) / len(scores)