def test_tpr(self): # TODO : this freezes forever on some windows cloud builds if os.name == 'nt': return # Cartesians list for brute force objects = dict() tpr_tree = Index(properties=Property(type=RT_TPRTree)) for operation, t_now, object_ in data_generator(): if operation == "INSERT": tpr_tree.insert(object_.id, object_.get_coordinates()) objects[object_.id] = object_ elif operation == "DELETE": tpr_tree.delete(object_.id, object_.get_coordinates(t_now)) del objects[object_.id] elif operation == "QUERY": tree_intersect = set( tpr_tree.intersection(object_.get_coordinates())) # Brute intersect brute_intersect = set() for tree_object in objects.values(): x_low, y_low = tree_object.getXY(object_.start_time) x_high, y_high = tree_object.getXY(object_.end_time) if intersects( x_low, y_low, x_high, y_high, # Line object_.x, object_.y, object_.dx, object_.dy): # Rect brute_intersect.add(tree_object.id) # Tree should match brute force approach assert tree_intersect == brute_intersect
def demo_delete(): seed = 1 # Seed for random points countries = get_countries() country_id_to_remove = 170 # United States of America country_uuids_to_remove = [] # Polygons' ids to remove from the index properties = Property() # properties.writethrough = True # properties.leaf_capacity = 1000 # properties.fill_factor = 0.5 index = Index(properties=properties) points_per_polygon = 1 points = [] # Inserts countries data to the index for i, (country_name, geometry) in enumerate(countries): for polygon in get_polygons(geometry): temp_uuid = uuid.uuid1().int index.insert(temp_uuid, polygon.bounds, country_name) if i == country_id_to_remove: # Saves index ids of the polygon to be removed later country_uuids_to_remove.append(temp_uuid) # Generates random points in every polygon and saves them random_points = gen_random_point(points_per_polygon, polygon, seed) points.append((country_name, random_points)) # Checks every generated point has matches for (country_name, country_points) in points: for point in country_points: hits = list(index.intersection(point.bounds, objects=True)) assert any(hit.object == country_name for hit in hits) # Remove geometry geometry = countries[country_id_to_remove][1] for i, polygon in enumerate(get_polygons(geometry)): index.delete(country_uuids_to_remove[i], polygon.bounds) points_missing = [] # Checks (again) if every generated point has matches for (country_name, country_points) in points: for point in country_points: hits = list(index.intersection(point.bounds, objects=True)) # Save any point without matches if not any(hit.object == country_name for hit in hits): points_missing.append(str(point) + " - " + country_name) # Print missing points for point in points_missing: print(point)
def local_search(points, bounding_box, iterations): labeled_points = [p for p in points if p.text] items = [] items.extend([p.label for p in labeled_points]) items.extend(points) items.extend(bounding_box.border_config) idx = Index() for i, item in enumerate(items): item.index = i idx.insert(item.index, item.box) for i in range(iterations): for lp in labeled_points: best_candidate = None min_penalty = None for lc1 in lp.label_candidates: penalty = POSITION_WEIGHT * lc1.position # Check overlap with other labels and points intersecting_item_ids = idx.intersection(lc1.box) for item_id in intersecting_item_ids: item = items[item_id] if hasattr(item, "point") and lc1.point == item.point: continue penalty += item.overlap(lc1) if min_penalty is None or penalty < min_penalty: min_penalty = penalty best_candidate = lc1 # Remove the old label from the index idx.delete(lp.label.index, lp.label.box) # Select the new label best_candidate.select() # Add the new label to the index and item list idx.insert(len(items), lp.label.box) items.append(lp.label)
class AdjacencyVersion(object): def __init__(self, feature_mapper): #self.partitions_complete = partitions_complete self.cid = 0 self.disc_idxs = {} self.feature_mapper = feature_mapper self.radius = .15 self.metric = 'hamming' self._rtree = None # internal datastructure self._ndim = None self.clusters = [] self.id2c = dict() self.c2id = dict() def to_json(self): data = { 'clusters': [c and c.__dict__ or None for c in self.clusters], 'id2c': [(key, c.__dict__) for key, c in self.id2c.items()], 'c2id': [(c.__dict__, val) for c, val in self.c2id.items()], 'cid': self.cid, '_ndim': self._ndim, '_rtreename': 'BLAH' } return json.dumps(data) def from_json(self, encoded): data = json.loads(encoded) self.clusters = [ c and Cluster.from_dict(c) or None for c in data['clusters'] ] self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']]) self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']]) self.cid = data['cid'] self._ndim = data['_ndim'] self._rtree = None def setup_rtree(self, ndim, clusters=None): if self._rtree: return self._rtree self._ndim = ndim if not ndim: class k(object): def __init__(self, graph): self.graph = graph def insert(self, *args, **kwargs): pass def delete(self, *args, **kwargs): pass def intersection(self, *args, **kwargs): return xrange(len(self.graph.clusters)) self._rtree = k(self) return self._rtree p = RProp() p.dimension = max(2, ndim) p.dat_extension = 'data' p.idx_extension = 'index' if clusters: gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters)) self._rtree = RTree(gen_func, properties=p) else: self._rtree = RTree(properties=p) return self._rtree def bbox_rtree(self, cluster, enlarge=0.): cols = cluster.cols bbox = cluster.bbox lower, higher = map(list, bbox) if self._ndim == 1: lower.append(0) higher.append(1) if enlarge != 0: for idx, col in enumerate(cols): rng = enlarge * self.feature_mapper.ranges[col] lower[idx] -= rng higher[idx] += rng bbox = lower + higher return bbox def insert_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.insert(idx, self.bbox_rtree(cluster)) return cluster def remove_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.delete(idx, self.bbox_rtree(cluster)) return cluster def search_rtree(self, cluster): self.setup_rtree(len(cluster.bbox[0])) bbox = self.bbox_rtree(cluster, enlarge=0.01) return self._rtree.intersection(bbox) res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)] return filter(bool, res) def bulk_init(self, clusters): if not clusters: return self.setup_rtree(len(clusters[0].bbox[0]), clusters) self.clusters = clusters for cid, c in enumerate(clusters): self.id2c[cid] = c self.c2id[c] = cid for dim in self.feature_mapper.attrs: Xs = [] for cidx, c in enumerate(clusters): Xs.append(self.feature_mapper(c, dim)) idx = NearestNeighbors(radius=self.radius, algorithm='ball_tree', metric=self.metric) self.disc_idxs[dim] = idx self.disc_idxs[dim].fit(np.array(Xs)) def contains(self, cluster): return cluster in self.c2id def remove(self, cluster): if cluster in self.c2id: cid = self.c2id[cluster] self.remove_rtree(cid, cluster) del self.c2id[cluster] del self.id2c[cid] self.clusters[cid] = None return True return False def neighbors(self, cluster): ret = None for name, vals in cluster.discretes.iteritems(): if name not in self.disc_idxs: return [] vect = self.feature_mapper(cluster, name) index = self.disc_idxs[name] dists, idxs = index.radius_neighbors(vect, radius=self.radius) idxs = set(idxs[0].tolist()) if ret is None: ret = idxs else: ret.intersection_update(idxs) #ret.update(idxs) if not ret: return [] idxs = self.search_rtree(cluster) if ret is None: ret = set(idxs) else: ret.intersection_update(set(idxs)) return filter(bool, [self.clusters[idx] for idx in ret]) """
class AdjacencyVersion(object): def __init__(self, feature_mapper): #self.partitions_complete = partitions_complete self.cid = 0 self.disc_idxs = {} self.feature_mapper = feature_mapper self.radius = .15 self.metric = 'hamming' self._rtree = None # internal datastructure self._ndim = None self.clusters = [] self.id2c = dict() self.c2id = dict() def to_json(self): data = { 'clusters' : [c and c.__dict__ or None for c in self.clusters], 'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()], 'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()], 'cid' : self.cid, '_ndim' : self._ndim, '_rtreename' : 'BLAH' } return json.dumps(data) def from_json(self, encoded): data = json.loads(encoded) self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']] self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']]) self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']]) self.cid = data['cid'] self._ndim = data['_ndim'] self._rtree = None def setup_rtree(self, ndim, clusters=None): if self._rtree: return self._rtree self._ndim = ndim if not ndim: class k(object): def __init__(self, graph): self.graph = graph def insert(self, *args, **kwargs): pass def delete(self, *args, **kwargs): pass def intersection(self, *args, **kwargs): return xrange(len(self.graph.clusters)) self._rtree = k(self) return self._rtree p = RProp() p.dimension = max(2, ndim) p.dat_extension = 'data' p.idx_extension = 'index' if clusters: gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters)) self._rtree = RTree(gen_func, properties=p) else: self._rtree = RTree(properties=p) return self._rtree def bbox_rtree(self, cluster, enlarge=0.): cols = cluster.cols bbox = cluster.bbox lower, higher = map(list, bbox) if self._ndim == 1: lower.append(0) higher.append(1) if enlarge != 0: for idx, col in enumerate(cols): rng = enlarge * self.feature_mapper.ranges[col] lower[idx] -= rng higher[idx] += rng bbox = lower + higher return bbox def insert_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.insert(idx,self.bbox_rtree(cluster)) return cluster def remove_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.delete(idx, self.bbox_rtree(cluster)) return cluster def search_rtree(self, cluster): self.setup_rtree(len(cluster.bbox[0])) bbox = self.bbox_rtree(cluster, enlarge=0.01) return self._rtree.intersection(bbox) res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)] return filter(bool, res) def bulk_init(self, clusters): if not clusters: return self.setup_rtree(len(clusters[0].bbox[0]), clusters) self.clusters = clusters for cid, c in enumerate(clusters): self.id2c[cid] = c self.c2id[c] = cid for dim in self.feature_mapper.attrs: Xs = [] for cidx, c in enumerate(clusters): Xs.append(self.feature_mapper(c, dim)) idx = NearestNeighbors( radius=self.radius, algorithm='ball_tree', metric=self.metric ) self.disc_idxs[dim] = idx self.disc_idxs[dim].fit(np.array(Xs)) def contains(self, cluster): return cluster in self.c2id def remove(self, cluster): if cluster in self.c2id: cid = self.c2id[cluster] self.remove_rtree(cid, cluster) del self.c2id[cluster] del self.id2c[cid] self.clusters[cid] = None return True return False def neighbors(self, cluster): ret = None for name, vals in cluster.discretes.iteritems(): if name not in self.disc_idxs: return [] vect = self.feature_mapper(cluster, name) index = self.disc_idxs[name] dists, idxs = index.radius_neighbors(vect, radius=self.radius) idxs = set(idxs[0].tolist()) if ret is None: ret = idxs else: ret.intersection_update(idxs) #ret.update(idxs) if not ret: return [] idxs = self.search_rtree(cluster) if ret is None: ret = set(idxs) else: ret.intersection_update(set(idxs)) return filter(bool, [self.clusters[idx] for idx in ret]) """
class AdjacencyGraph(object): def __init__(self, clusters, partitions_complete=True): self.partitions_complete = partitions_complete self.graph = defaultdict(set) self.cid = 0 self.clusters = [] self.id2c = dict() self.c2id = dict() self._rtree = None # internal datastructure self._ndim = None self.bulk_init(clusters) def to_json(self): data = { 'clusters' : [c and c.__dict__ or None for c in self.clusters], 'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()], 'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()], 'graph' : [(key.__dict__, [val.__dict__ for val in vals]) for key, vals in self.graph.itemsiter()], 'cid' : self.cid, '_ndim' : self._ndim, '_rtreename' : 'BLAH' } return json.dumps(data) def from_json(self, encoded): data = json.loads(encoded) self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']] self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']]) self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']]) self.graph = dict([(Cluster.from_dict(key), map(Cluster.from_dict, vals)) for key, vals in data['graph']]) self.cid = data['cid'] self._ndim = data['_ndim'] self._rtree = None def setup_rtree(self, ndim, clusters=None): if self._rtree: return self._rtree self._ndim = ndim if not ndim: class k(object): def __init__(self, graph): self.graph = graph def insert(self, *args, **kwargs): pass def delete(self, *args, **kwargs): pass def intersection(self, *args, **kwargs): return xrange(len(self.graph.clusters)) self._rtree = k(self) return self._rtree p = RProp() p.dimension = max(2, ndim) p.dat_extension = 'data' p.idx_extension = 'index' if clusters: gen_func = ((i, self.bbox_rtree(c, enlarge=0.00001), None) for i, c in enumerate(clusters)) self._rtree = RTree(gen_func, properties=p) else: self._rtree = RTree(properties=p) return self._rtree def bbox_rtree(self, cluster, enlarge=0.): bbox = cluster.bbox lower, higher = map(list, bbox) if self._ndim == 1: lower.append(0) higher.append(1) if enlarge != 1.: lower = [v - enlarge for v in lower] higher = [v + enlarge for v in higher] bbox = lower + higher return bbox def insert_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.insert(idx,self.bbox_rtree(cluster)) return cluster def remove_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.delete(idx, self.bbox_rtree(cluster)) return cluster def search_rtree(self, cluster): self.setup_rtree(len(cluster.bbox[0])) bbox = self.bbox_rtree(cluster, enlarge=0.00001) res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)] return filter(bool, res) def bulk_init(self, clusters): if clusters: self.setup_rtree(len(clusters[0].bbox[0]), clusters) self.clusters.extend(clusters) for cid, c in enumerate(clusters): self.id2c[cid] = c self.c2id[c] = cid for idx, c in enumerate(clusters): for n in self.search_rtree(c): if self.c2id[n] <= idx: continue if c.discretes_contains(n) and box_completely_contained(c.bbox, n.bbox): continue if not c.adjacent(n, 0.8): continue self.graph[c].add(n) self.graph[n].add(c) def insert(self, cluster): if cluster in self.graph: return self.graph[cluster] = set() #for o in self.search_rtree(cluster): for o in self.graph.keys(): if cluster == o: continue if cluster.adjacent(o, 0.8) or (volume(intersection_box(cluster.bbox, o.bbox)) > 0 and not cluster.contains(o)): self.graph[cluster].add(o) self.graph[o].add(cluster) cid = len(self.clusters) self.clusters.append(cluster) self.id2c[cid] = cluster self.c2id[cluster] = cid self.insert_rtree(cid, cluster) def remove(self, cluster): if cluster not in self.graph: return try: for neigh in self.graph[cluster]: if not neigh == cluster: self.graph[neigh].remove(cluster) except: pdb.set_trace() del self.graph[cluster] cid = self.c2id[cluster] self.remove_rtree(cid, cluster) del self.c2id[cluster] del self.id2c[cid] self.clusters[cid] = None def neighbors(self, cluster): if not self.partitions_complete: return filter(bool, self.clusters) if cluster in self.graph: return self.graph[cluster] ret = set() intersects = self.search_rtree(cluster) for key in filter(cluster.adjacent, intersects): if box_completely_contained(key.bbox, cluster.bbox): continue ret.update(self.graph[key]) return ret
class DyClee: """ Implementation roughly as per https://doi.org/10.1016/j.patcog.2019.05.024. """ def __init__(self, context: DyCleeContext): self.context = context self.dense_µclusters: Set[MicroCluster] = Set() self.semidense_µclusters: Set[MicroCluster] = Set() self.outlier_µclusters: Set[MicroCluster] = Set() self.long_term_memory: Set[MicroCluster] = Set() self.eliminated: Set[MicroCluster] = Set() self.next_µcluster_index: int = 0 self.next_class_label: int = 0 self.n_steps: int = 0 self.last_partitioning_step: int = 0 self.last_density_step: int = 0 if self.context.maintain_rtree: p = RTreeProperty(dimension=self.context.n_features) self.rtree = RTreeIndex(properties=p) # This mapping is used to retrieve microcluster objects from their hashes # stored with their locations in the R*-tree self.µcluster_map: Optional[dict[int, MicroCluster]] = {} else: self.rtree = None self.µcluster_map = None @property def active_µclusters(self) -> Set[MicroCluster]: return self.dense_µclusters | self.semidense_µclusters @property def all_µclusters(self) -> Set[MicroCluster]: return self.active_µclusters | self.outlier_µclusters | self.long_term_memory def get_next_µcluster_index(self) -> int: index = self.next_µcluster_index self.next_µcluster_index += 1 return index def get_next_class_label(self) -> int: label = self.next_class_label self.next_class_label += 1 return label def update_density_partitions(self, time: Timestamp) -> Set[MicroCluster]: densities = np.array( [µcluster.density(time) for µcluster in self.all_µclusters]) mean_density = np.mean(densities) median_density = np.median(densities) dense: Set[MicroCluster] = Set() semidense: Set[MicroCluster] = Set() outliers: Set[MicroCluster] = Set() memory: Set[MicroCluster] = Set() eliminated: Set[MicroCluster] = Set() for µcluster in self.all_µclusters: density = µcluster.density(time) if mean_density <= density >= median_density: # Any may become dense dense.add(µcluster) µcluster.once_dense = True elif (µcluster in self.dense_µclusters or µcluster in self.semidense_µclusters or µcluster in self.outlier_µclusters) and ( density >= mean_density) != (density >= median_density): # Dense and outliers may become dense # Semi-dense may stay semi-dense semidense.add(µcluster) elif ((µcluster in self.dense_µclusters or µcluster in self.semidense_µclusters) and mean_density > density < median_density) or ( µcluster in self.outlier_µclusters and density >= self.context.elimination_threshold): # Dense and semi-dense may become outliers # Outliers may stay outliers outliers.add(µcluster) elif (self.context.long_term_memory and µcluster in self.outlier_µclusters and µcluster.once_dense): # Outliers may be put into long-term memory memory.add(µcluster) else: # If none of the conditions are met, the microcluster is eliminated eliminated.add(µcluster) if self.context.maintain_rtree: # Remove microcluster from R*-tree self.rtree.delete(hash(µcluster), µcluster.bounding_box) # Store the final sets, sorting by index for predictable ordering self.dense_µclusters = Set(sorted(dense, key=lambda µ: µ.index)) self.semidense_µclusters = Set(sorted(semidense, key=lambda µ: µ.index)) self.outlier_µclusters = Set(sorted(outliers, key=lambda µ: µ.index)) self.long_term_memory = Set(sorted(memory, key=lambda µ: µ.index)) if self.context.store_elements: # Keep track of eliminated microclusters (to not lose elements) self.eliminated |= eliminated return eliminated def distance_step(self, element: Element, time: Timestamp) -> MicroCluster: if self.context.update_ranges: self.context.update_feature_ranges(element) if not self.all_µclusters: # Create new microcluster µcluster = MicroCluster(element, time, context=self.context, index=self.get_next_µcluster_index()) self.outlier_µclusters.add(µcluster) if self.context.maintain_rtree: # Add microcluster to R*-tree self.µcluster_map[hash(µcluster)] = µcluster self.rtree.insert(hash(µcluster), µcluster.bounding_box) return µcluster else: closest: Optional[MicroCluster] = None if self.context.distance_index == SpatialIndexMethod.RTREE: # The R*-tree searches all microclusters regardless of precedence, so we # need to filter by priority after the index search # Find all reachable microclusters matches: Set[MicroCluster] = Set([ self.µcluster_map[hash_] for hash_ in self.rtree.intersection((*element, *element)) ]) min_dist = None for candidate_µclusters in (self.active_µclusters, self.outlier_µclusters, self.long_term_memory): # First match active microclusters, then others for µcluster in matches & candidate_µclusters: dist = µcluster.distance(element) if (closest is None or dist < min_dist or (dist == min_dist and µcluster.density(time) > closest.density(time))): closest = µcluster min_dist = dist else: for candidate_µclusters in (self.active_µclusters, self.outlier_µclusters, self.long_term_memory): # First search actives, then others for reachable microclusters if not candidate_µclusters: continue if self.context.distance_index == SpatialIndexMethod.KDTREE: # Ensure predictable order for indexability candidate_µclusters = list(candidate_µclusters) candidate_centroids: np.ndarray = np.row_stack([ µcluster.centroid for µcluster in candidate_µclusters ]) # Find potentially reachable microclusters (using L-inf norm) idcs, = KDTree( candidate_centroids, p=np.inf).query_radius( np.reshape(element, (1, -1)), self.context.potentially_reachable_radius) if not len(idcs): continue min_dist = None # Find closest (L-1 norm) microcluster among the reachable ones for i in idcs: µcluster = candidate_µclusters[i] if not µcluster.is_reachable(element): continue dist = µcluster.distance(element) # Higher density is tie-breaker in case of equal distances if (closest is None or dist < min_dist or (dist == min_dist and µcluster.density(time) > closest.density(time))): closest = µcluster min_dist = dist else: # Brute force min_dist = None for µcluster in candidate_µclusters: if not µcluster.is_reachable(element): continue dist = µcluster.distance(element) if (closest is None or dist < min_dist or (dist == min_dist and µcluster.density(time) > closest.density(time))): closest = µcluster min_dist = dist if closest is not None: # Match found, no need to check next set break if closest is not None: if self.context.maintain_rtree: # Remove microcluster from R*-tree self.rtree.delete(hash(closest), closest.bounding_box) # Add element to closest microcluster closest.add(element, time) if self.context.maintain_rtree: # Add modified microcluster to R*-tree self.rtree.insert(hash(closest), closest.bounding_box) return closest else: # Create new microcluster µcluster = MicroCluster(element, time, context=self.context, index=self.get_next_µcluster_index()) self.outlier_µclusters.add(µcluster) if self.context.maintain_rtree: # Add microcluster to R*-tree self.µcluster_map[hash(µcluster)] = µcluster self.rtree.insert(hash(µcluster), µcluster.bounding_box) return µcluster def global_density_step( self, time: Timestamp) -> tuple[list[Cluster], Set[MicroCluster]]: clusters: list[Cluster] = [] seen: Set[MicroCluster] = Set() for µcluster in self.dense_µclusters: if µcluster in seen: continue seen.add(µcluster) if µcluster.label is None: µcluster.label = self.get_next_class_label() cluster = Cluster(µcluster, time) clusters.append(cluster) # Get dense and semi-dense directly connected neighbours connected = µcluster.get_neighbours( (self.dense_µclusters | self.semidense_µclusters) - seen, rtree_index=self.rtree, µcluster_map=self.µcluster_map) while connected: neighbour = connected.pop() if neighbour in seen: continue seen.add(neighbour) # Outlier microclusters are ignored if neighbour in self.outlier_µclusters: continue # Dense and semi-dense microclusters become part of the cluster neighbour.label = µcluster.label cluster.add(neighbour, time) # Semi-dense neighbours may only form the boundary if neighbour not in self.dense_µclusters: continue # Get neighbour's dense and semi-dense directly connected neighbours # and add to set of microclusters connected to the parent connected |= neighbour.get_neighbours( (self.dense_µclusters | self.semidense_µclusters) - seen, rtree_index=self.rtree, µcluster_map=self.µcluster_map) # Find all microclusters that were not grouped into a cluster unclustered = self.all_µclusters for cluster in clusters: unclustered -= cluster.µclusters return clusters, unclustered def local_density_step( self, time: Timestamp) -> tuple[list[Cluster], Set[MicroCluster]]: raise NotImplementedError("TODO") def density_step( self, time: Timestamp) -> tuple[list[Cluster], Set[MicroCluster]]: if self.context.multi_density: return self.local_density_step(time) else: return self.global_density_step(time) def step( self, element: Element, time: Timestamp, skip_density_step: bool = False ) -> tuple[MicroCluster, Optional[list[Cluster]], Optional[Set[MicroCluster]], Optional[Set[MicroCluster]]]: self.n_steps += 1 µcluster = self.distance_step(element, time) if (self.n_steps >= self.last_partitioning_step + self.context.partitioning_interval): eliminated = self.update_density_partitions(time) self.last_partitioning_step = self.n_steps else: eliminated = None if (not skip_density_step and self.n_steps >= self.last_density_step + self.context.density_interval): clusters, unclustered = self.density_step(time) self.last_density_step = self.n_steps else: clusters = None unclustered = None return µcluster, clusters, unclustered, eliminated def run(self, elements: Iterable[Element], times: Optional[Iterable[Timestamp]] = None, progress: bool = True) -> list[Cluster]: if progress and tqdm is not None: elements = tqdm(elements) if times is None: times = count() for element, time in zip(elements, times): self.step(element, time, skip_density_step=True) clusters, _ = self.density_step(time) return clusters