def __or__(self, x): return self.__class__( weightedUnion(self._dict, x._dict)[1], union(self._words, x._words), self._index, ) return self.__class__(result, self._words+x._words, self._index)
def _trivial(L): # L is empty or has only one (mapping, weight) pair. If there is a # pair, we may still need to multiply the mapping by its weight. assert len(L) <= 1 if len(L) == 0: return IIBucket() [(result, weight)] = L if weight != 1: dummy, result = weightedUnion(IIBucket(), result, 0, weight) return result
def _trivial(l_): # l is empty or has only one (mapping, weight) pair. If there is a # pair, we may still need to multiply the mapping by its weight. assert len(l_) <= 1 if len(l_) == 0: return IIBucket() [(result, weight)] = l_ if weight != 1: dummy, result = weightedUnion(IIBucket(), result, 0, weight) return result
def mass_weightedUnion(L): "A list of (mapping, weight) pairs -> their weightedUnion IIBucket." if len(L) < 2: return _trivial(L) # Balance unions as closely as possible, smallest to largest. merge = NBest(len(L)) for x, weight in L: merge.add((x, weight), len(x)) while len(merge) > 1: # Merge the two smallest so far, and add back to the queue. (x, wx), dummy = merge.pop_smallest() (y, wy), dummy = merge.pop_smallest() dummy, z = weightedUnion(x, y, wx, wy) merge.add((z, 1), len(z)) (result, weight), dummy = merge.pop_smallest() return result
def mass_weightedUnion(l_): "A list of (mapping, weight) pairs -> their weightedUnion IIBucket." if len(l_) < 2: return _trivial(l_) # Balance unions as closely as possible, smallest to largest. merge = NBest(len(l_)) for x, weight in l_: merge.add((x, weight), len(x)) while len(merge) > 1: # Merge the two smallest so far, and add back to the queue. (x, wx), dummy = merge.pop_smallest() (y, wy), dummy = merge.pop_smallest() dummy, z = weightedUnion(x, y, wx, wy) merge.add((z, 1), len(z)) (result, weight), dummy = merge.pop_smallest() return result
def __or__(self, x): return self.__class__( weightedUnion(self._dict, x._dict)[1], union(self._words, x._words), self._index, )
def __or__(self, x): return self.__class__( weightedUnion(self._dict, x._dict), union(self._words, x._words), self._index, )
def getClusters(catalog_tool, filters): # the objects are searched for in the tile limits (to get the same clusters every time) grid_size = 16 # geopoints' and clusters' density on map / also depends on map frame size # unpack map limits if filters: lat_min = float(filters[0]['geo_latitude']['query'][0]) lat_max = float(filters[0]['geo_latitude']['query'][1]) lon_min = float(filters[0]['geo_longitude']['query'][0]) lon_max = float(filters[0]['geo_longitude']['query'][1]) else: # this should not happen return [], [] tlat_min, tlat_max, tlon_min, tlon_max = clusters.get_discretized_limits( lat_min, lat_max, lon_min, lon_max, grid_size) catalog = catalog_tool._catalog # getting the inner indexes for lat and lon lat_index = catalog.getIndex('geo_latitude')._index lon_index = catalog.getIndex('geo_longitude')._index # adjust to cover results outside frame, but very close to margins # trying to fix cluster flickering near margins # applying the lat and lon indexes to get the rids rs = None lat_set, lat_dict = _apply_index_with_range_dict_results( lat_index, Decimal(str(tlat_min)), Decimal(str(tlat_max))) w, rs = weightedIntersection(rs, lat_set) lon_set, lon_dict = _apply_index_with_range_dict_results( lon_index, Decimal(str(tlon_min)), Decimal(str(tlon_max))) w, rs = weightedIntersection(rs, lon_set) rs_final = None # OR the filters and apply the index for each one for f in filters: rs_f = rs #adjust geo limits in filters to be consistent with discretized tile limits f['geo_longitude']['query'] = (Decimal(str(tlon_min)), Decimal(str(tlon_max))) f['geo_latitude']['query'] = (Decimal(str(tlat_min)), Decimal(str(tlat_max))) #this code is from the search function in the catalog implementation in Zope for i in catalog.indexes.keys(): index = catalog.getIndex(i) _apply_index = getattr(index, "_apply_index", None) if _apply_index is None: continue r = _apply_index(f) if r is not None: r, u = r w, rs_f = weightedIntersection(rs_f, r) w, rs_final = weightedUnion(rs_f, rs_final) r_list = list(rs_final) # transform objects to points points = [] for i in range(len(r_list)): points.append( clusters.Point(i, float(lat_dict[r_list[i]]), float(lon_dict[r_list[i]]))) centers, groups = clusters.kmeans(tlat_min, tlat_max, tlon_min, tlon_max, points, grid_size) # transform group points to rids for i in range(len(groups)): groups[i] = map(lambda p: r_list[p.id], groups[i]) return centers, groups
def getClusters(catalog_tool, filters): # the objects are searched for in the tile limits (to get the same clusters every time) grid_size = 12 # geopoints' and clusters' density on map / also depends on map frame size # unpack map limits if filters: lat_min = float(filters[0]['geo_latitude']['query'][0]) lat_max = float(filters[0]['geo_latitude']['query'][1]) lon_min = float(filters[0]['geo_longitude']['query'][0]) lon_max = float(filters[0]['geo_longitude']['query'][1]) else: # this should not happen return [], [] tlat_min, tlat_max, tlon_min, tlon_max = clusters.get_discretized_limits(lat_min, lat_max, lon_min, lon_max, grid_size) catalog = catalog_tool._catalog # getting the inner indexes for lat and lon lat_index = catalog.getIndex('geo_latitude')._index lon_index = catalog.getIndex('geo_longitude')._index # adjust to cover results outside frame, but very close to margins # trying to fix cluster flickering near margins # applying the lat and lon indexes to get the rids rs = None lat_set, lat_dict = _apply_index_with_range_dict_results(lat_index, Decimal(str(tlat_min)), Decimal(str(tlat_max))) w, rs = weightedIntersection(rs, lat_set) lon_set, lon_dict = _apply_index_with_range_dict_results(lon_index, Decimal(str(tlon_min)), Decimal(str(tlon_max))) w, rs = weightedIntersection(rs, lon_set) rs_final = None # OR the filters and apply the index for each one for f in filters: rs_f = rs #adjust geo limits in filters to be consistent with discretized tile limits f['geo_longitude']['query'] = (Decimal(str(tlon_min)), Decimal(str(tlon_max))) f['geo_latitude']['query'] = (Decimal(str(tlat_min)), Decimal(str(tlat_max))) #this code is from the search function in the catalog implementation in Zope for i in catalog.indexes.keys(): index = catalog.getIndex(i) _apply_index = getattr(index, "_apply_index", None) if _apply_index is None: continue r = _apply_index(f) if r is not None: r, u = r w, rs_f = weightedIntersection(rs_f, r) w, rs_final = weightedUnion(rs_f, rs_final) r_list = list(rs_final) # transform objects to points points = [] for i in range(len(r_list)): points.append(clusters.Point(i, float(lat_dict[r_list[i]]), float(lon_dict[r_list[i]]))) centers, groups = clusters.kmeans(tlat_min, tlat_max, tlon_min, tlon_max, points, grid_size) # transform group points to rids for i in range(len(groups)): groups[i] = map(lambda p: r_list[p.id], groups[i]) return centers, groups