def initialize_nodes(self): """Initialize nodes with only one root node which children are all microclasses.""" root = Node(list(self.microclasses), children=[ Node([m], size=len(self.microclasses[m]), macroclass=False, color="c") for m in self.microclasses ], size=sum( len(self.microclasses[m]) for m in self.microclasses), color="r", macroclass=False) self.nodes = {frozenset(self.microclasses): root}
def make_nodes(concepts, prb): nodes = {} for concept in concepts: extent = concept.extent intent = concept.intent properties = concept.properties objects = concept.objects size = sum( len(self.leaves[label]) for label in extent if label in self.leaves) nodes[extent] = Node(extent, intent=intent, size=size, common=properties, objects=objects, macroclass=False) prb.update(1) return nodes
def merge(self, a, b): """Merge two Clusters, build a Node to represent the result, update the DL. Parameters: a (str): the label of a cluster to merge. b (str): the label of a cluster to merge.""" labels = a | b self.R, self.C, self.P, self.patterns, self.clusters[ labels] = self._simulate_merge(a, b) # del self.clusters[b] # del self.clusters[a] prev_DL = self.DL self.DL = (self.R + self.C + self.P + self.M) left = self.nodes.pop(a) right = self.nodes.pop(b) leaves = list(labels) size = left.attributes["size"] + right.attributes["size"] color = "c" if self.DL >= prev_DL: self.printv( "\nDL stopped improving: prev = {}, current best = {}".format( prev_DL, self.DL)) color = "r" self.nodes[labels] = Node(leaves, size=size, children=[left, right], DL=self.DL, color=color, macroclass=color != "r") self.printv("\nMerging ", ", ".join(a), " and ", ", ".join(b), "with DL ", self.DL) current_partition = " - ".join( [", ".join(self.nodes[c].labels) for c in self.nodes]) self.log(" ".join([ current_partition, ":\t", "\t".join( (str(self.M), str(self.C), str(self.P), str(self.R), str(self.DL))), "\n" ]), name="clusters")
def __init__(self, microclasses, *args, **kwargs): self.preferences = kwargs self.microclasses = microclasses self.nodes = { frozenset([m]): Node([m], size=len(self.microclasses[m]), macroclass=False) for m in self.microclasses } if "verbose" not in kwargs or not kwargs["verbose"]: self.printv = _do_nothing if "debug" in kwargs and kwargs["debug"] and kwargs["prefix"]: self.preferences[ "filename"] = self.preferences["prefix"] + "_{}.log" print("Writing logs to : ", self.preferences["filename"].format("<...>")) else: self.log = _do_nothing
def make_nodes(concepts, prb): nodes = {} for concept in concepts: extent = concept.extent intent = concept.intent properties = concept.properties objects = concept.objects size = sum( len(self.leaves[label]) for label in extent if label in self.leaves) annotations = getattr(concept, '_extra_qumin_annotation', {}) nodes[extent] = Node(extent, intent=intent, size=size, common=properties, objects=objects, macroclass=False, **annotations) prb.update(1) return nodes
def main(dataset_fn, output_fn, clusters_no, w): geo_locs = [] # read location data from csv file and store each location as a Point(latit,longit) object df = pd.read_csv(dataset_fn) for index, row in df.iterrows(): loc_ = Node( [float(row['X']), float(row['Y']), float(row['PreChange'])], row['ID']) geo_locs.append(loc_) # run k_means clustering w = np.array(w) model = KMeans(geo_locs, clusters_no, w) flag = model.fit(True) if flag == -1: print("No of points are less than cluster number!") else: # save clustering results is a list of lists where each list represents one cluster model.save(output_fn) model.showresult(True)
def merge(self, a, b): """Merge two Clusters, build a Node to represent the result, update the distances. Parameters: a (frozenset): the label of a cluster to merge. b (frozenset): the label of a cluster to merge.""" new = a | b d = self.distances[a][b] self.printv("\nMerging ", list(a), list(b), "with d ", d) self.update_distances(new) # Make tree left = self.nodes.pop(a) right = self.nodes.pop(b) leaves = left.labels + right.labels size = left.attributes["size"] + right.attributes["size"] color = "r" d = self.distances[a][b] self.nodes[new] = Node(leaves, size=size, children=[left, right], dist=d, color=color, macroclass=False)
def split_leaves(self): """Split a cluster by replacing it with the two clusters left and right. Recompute the description length when left and right are separated. Build two nodes corresponding to left and right, children of to_split. """ leaves = self.to_split.children if len(self.left.labels) > 0 and len(self.right.labels) > 0: left_leaves = [] right_leaves = [] left_labels = self.left.labels right_labels = self.right.labels for leaf in leaves: if leaf.labels[0] in self.left.labels: left_leaves.append(leaf) else: right_leaves.append(leaf) # del self.clusters[frozenset(self.to_split.labels)] self.right.totalsize = self.left.totalsize = self.size self.right.C = weighted_log(self.right.size, self.size) self.left.C = weighted_log(self.left.size, self.size) self.clusters[frozenset(right_labels)] = self.right self.clusters[frozenset(left_labels)] = self.left self.compute_DL() current_partition = " - ".join(", ".join(c) for c in self.nodes) self.log(" ".join([ current_partition, ":\t", "\t".join( (str(self.M), str(self.C), str(self.P), str(self.R), str(self.DL))), "\n" ]), name="clusters") color = "r" if self.DL >= self.minDL: color = "c" else: self.minDL = self.DL kwargs = {"macroclass": False, "DL": self.DL, "color": color} if len(left_leaves) > 1: left = Node(left_labels, size=sum(leaf.attributes["size"] for leaf in left_leaves), children=left_leaves, **kwargs) else: left = left_leaves[0] left.attributes["DL"] = self.DL if len(right_leaves) > 1: right = Node(right_labels, size=sum(leaf.attributes["size"] for leaf in right_leaves), children=right_leaves, **kwargs) else: right = right_leaves[0] right.attributes["DL"] = self.DL self.printv("Splitted:", ", ".join(right.labels), "\n\t", ", ".join(left.labels)) self.to_split.children = [left, right]