Exemplo n.º 1
0
 def initialize_nodes(self):
     """Initialize nodes with only one root node which children are all microclasses."""
     root = Node(list(self.microclasses),
                 children=[
                     Node([m],
                          size=len(self.microclasses[m]),
                          macroclass=False,
                          color="c") for m in self.microclasses
                 ],
                 size=sum(
                     len(self.microclasses[m]) for m in self.microclasses),
                 color="r",
                 macroclass=False)
     self.nodes = {frozenset(self.microclasses): root}
Exemplo n.º 2
0
 def make_nodes(concepts, prb):
     nodes = {}
     for concept in concepts:
         extent = concept.extent
         intent = concept.intent
         properties = concept.properties
         objects = concept.objects
         size = sum(
             len(self.leaves[label]) for label in extent
             if label in self.leaves)
         nodes[extent] = Node(extent,
                              intent=intent,
                              size=size,
                              common=properties,
                              objects=objects,
                              macroclass=False)
         prb.update(1)
     return nodes
Exemplo n.º 3
0
    def merge(self, a, b):
        """Merge two Clusters, build a Node to represent the result, update the DL.

        Parameters:
            a (str): the label of a cluster to merge.
            b (str): the label of a cluster to merge."""
        labels = a | b
        self.R, self.C, self.P, self.patterns, self.clusters[
            labels] = self._simulate_merge(a, b)
        # del self.clusters[b]
        # del self.clusters[a]

        prev_DL = self.DL
        self.DL = (self.R + self.C + self.P + self.M)

        left = self.nodes.pop(a)
        right = self.nodes.pop(b)
        leaves = list(labels)
        size = left.attributes["size"] + right.attributes["size"]
        color = "c"
        if self.DL >= prev_DL:
            self.printv(
                "\nDL stopped improving: prev = {}, current best = {}".format(
                    prev_DL, self.DL))
            color = "r"

        self.nodes[labels] = Node(leaves,
                                  size=size,
                                  children=[left, right],
                                  DL=self.DL,
                                  color=color,
                                  macroclass=color != "r")

        self.printv("\nMerging ", ", ".join(a), " and ", ", ".join(b),
                    "with DL ", self.DL)

        current_partition = " - ".join(
            [", ".join(self.nodes[c].labels) for c in self.nodes])
        self.log(" ".join([
            current_partition, ":\t", "\t".join(
                (str(self.M), str(self.C), str(self.P), str(self.R),
                 str(self.DL))), "\n"
        ]),
                 name="clusters")
Exemplo n.º 4
0
    def __init__(self, microclasses, *args, **kwargs):
        self.preferences = kwargs
        self.microclasses = microclasses
        self.nodes = {
            frozenset([m]): Node([m],
                                 size=len(self.microclasses[m]),
                                 macroclass=False)
            for m in self.microclasses
        }

        if "verbose" not in kwargs or not kwargs["verbose"]:
            self.printv = _do_nothing
        if "debug" in kwargs and kwargs["debug"] and kwargs["prefix"]:
            self.preferences[
                "filename"] = self.preferences["prefix"] + "_{}.log"
            print("Writing logs to : ",
                  self.preferences["filename"].format("<...>"))
        else:
            self.log = _do_nothing
Exemplo n.º 5
0
 def make_nodes(concepts, prb):
     nodes = {}
     for concept in concepts:
         extent = concept.extent
         intent = concept.intent
         properties = concept.properties
         objects = concept.objects
         size = sum(
             len(self.leaves[label]) for label in extent
             if label in self.leaves)
         annotations = getattr(concept, '_extra_qumin_annotation', {})
         nodes[extent] = Node(extent,
                              intent=intent,
                              size=size,
                              common=properties,
                              objects=objects,
                              macroclass=False,
                              **annotations)
         prb.update(1)
     return nodes
Exemplo n.º 6
0
def main(dataset_fn, output_fn, clusters_no, w):
    geo_locs = []
    # read location data from csv file and store each location as a Point(latit,longit) object
    df = pd.read_csv(dataset_fn)
    for index, row in df.iterrows():
        loc_ = Node(
            [float(row['X']),
             float(row['Y']),
             float(row['PreChange'])], row['ID'])
        geo_locs.append(loc_)
    # run k_means clustering
    w = np.array(w)
    model = KMeans(geo_locs, clusters_no, w)
    flag = model.fit(True)
    if flag == -1:
        print("No of points are less than cluster number!")
    else:
        # save clustering results is a list of lists where each list represents one cluster
        model.save(output_fn)
        model.showresult(True)
Exemplo n.º 7
0
    def merge(self, a, b):
        """Merge two Clusters, build a Node to represent the result, update the distances.

        Parameters:
            a (frozenset): the label of a cluster to merge.
            b (frozenset): the label of a cluster to merge."""
        new = a | b
        d = self.distances[a][b]

        self.printv("\nMerging ", list(a), list(b), "with d ", d)

        self.update_distances(new)

        # Make tree
        left = self.nodes.pop(a)
        right = self.nodes.pop(b)
        leaves = left.labels + right.labels
        size = left.attributes["size"] + right.attributes["size"]
        color = "r"
        d = self.distances[a][b]
        self.nodes[new] = Node(leaves, size=size, children=[left, right],
                               dist=d, color=color,
                               macroclass=False)
Exemplo n.º 8
0
    def split_leaves(self):
        """Split a cluster by replacing it with the two clusters left and right.

        Recompute the description length when left and right are separated.
        Build two nodes corresponding to left and right, children of to_split.
        """
        leaves = self.to_split.children

        if len(self.left.labels) > 0 and len(self.right.labels) > 0:
            left_leaves = []
            right_leaves = []
            left_labels = self.left.labels
            right_labels = self.right.labels

            for leaf in leaves:
                if leaf.labels[0] in self.left.labels:
                    left_leaves.append(leaf)
                else:
                    right_leaves.append(leaf)

            # del self.clusters[frozenset(self.to_split.labels)]
            self.right.totalsize = self.left.totalsize = self.size
            self.right.C = weighted_log(self.right.size, self.size)
            self.left.C = weighted_log(self.left.size, self.size)
            self.clusters[frozenset(right_labels)] = self.right
            self.clusters[frozenset(left_labels)] = self.left

            self.compute_DL()
            current_partition = " - ".join(", ".join(c) for c in self.nodes)
            self.log(" ".join([
                current_partition, ":\t", "\t".join(
                    (str(self.M), str(self.C), str(self.P), str(self.R),
                     str(self.DL))), "\n"
            ]),
                     name="clusters")

            color = "r"
            if self.DL >= self.minDL:
                color = "c"
            else:
                self.minDL = self.DL
            kwargs = {"macroclass": False, "DL": self.DL, "color": color}
            if len(left_leaves) > 1:
                left = Node(left_labels,
                            size=sum(leaf.attributes["size"]
                                     for leaf in left_leaves),
                            children=left_leaves,
                            **kwargs)
            else:
                left = left_leaves[0]
                left.attributes["DL"] = self.DL
            if len(right_leaves) > 1:
                right = Node(right_labels,
                             size=sum(leaf.attributes["size"]
                                      for leaf in right_leaves),
                             children=right_leaves,
                             **kwargs)
            else:
                right = right_leaves[0]
                right.attributes["DL"] = self.DL

            self.printv("Splitted:", ", ".join(right.labels), "\n\t",
                        ", ".join(left.labels))
            self.to_split.children = [left, right]