示例#1
0
 def __or__(self, x):
     return self.__class__(
         weightedUnion(self._dict, x._dict)[1],
         union(self._words, x._words),
         self._index,
         )
     return self.__class__(result, self._words+x._words, self._index)
示例#2
0
def _trivial(L):
    # L is empty or has only one (mapping, weight) pair.  If there is a
    # pair, we may still need to multiply the mapping by its weight.
    assert len(L) <= 1
    if len(L) == 0:
        return IIBucket()
    [(result, weight)] = L
    if weight != 1:
        dummy, result = weightedUnion(IIBucket(), result, 0, weight)
    return result
示例#3
0
def _trivial(l_):
    # l is empty or has only one (mapping, weight) pair. If there is a
    # pair, we may still need to multiply the mapping by its weight.
    assert len(l_) <= 1
    if len(l_) == 0:
        return IIBucket()
    [(result, weight)] = l_
    if weight != 1:
        dummy, result = weightedUnion(IIBucket(), result, 0, weight)
    return result
示例#4
0
def mass_weightedUnion(L):
    "A list of (mapping, weight) pairs -> their weightedUnion IIBucket."
    if len(L) < 2:
        return _trivial(L)
    # Balance unions as closely as possible, smallest to largest.
    merge = NBest(len(L))
    for x, weight in L:
        merge.add((x, weight), len(x))
    while len(merge) > 1:
        # Merge the two smallest so far, and add back to the queue.
        (x, wx), dummy = merge.pop_smallest()
        (y, wy), dummy = merge.pop_smallest()
        dummy, z = weightedUnion(x, y, wx, wy)
        merge.add((z, 1), len(z))
    (result, weight), dummy = merge.pop_smallest()
    return result
示例#5
0
def mass_weightedUnion(l_):
    "A list of (mapping, weight) pairs -> their weightedUnion IIBucket."
    if len(l_) < 2:
        return _trivial(l_)
    # Balance unions as closely as possible, smallest to largest.
    merge = NBest(len(l_))
    for x, weight in l_:
        merge.add((x, weight), len(x))
    while len(merge) > 1:
        # Merge the two smallest so far, and add back to the queue.
        (x, wx), dummy = merge.pop_smallest()
        (y, wy), dummy = merge.pop_smallest()
        dummy, z = weightedUnion(x, y, wx, wy)
        merge.add((z, 1), len(z))
    (result, weight), dummy = merge.pop_smallest()
    return result
示例#6
0
 def __or__(self, x):
     return self.__class__(
         weightedUnion(self._dict, x._dict)[1],
         union(self._words, x._words),
         self._index,
     )
示例#7
0
 def __or__(self, x):
     return self.__class__(
         weightedUnion(self._dict, x._dict),
         union(self._words, x._words),
         self._index,
         )
示例#8
0
def getClusters(catalog_tool, filters):
    # the objects are searched for in the tile limits (to get the same clusters every time)
    grid_size = 16  # geopoints' and clusters' density on map / also depends on map frame size

    # unpack map limits
    if filters:
        lat_min = float(filters[0]['geo_latitude']['query'][0])
        lat_max = float(filters[0]['geo_latitude']['query'][1])

        lon_min = float(filters[0]['geo_longitude']['query'][0])
        lon_max = float(filters[0]['geo_longitude']['query'][1])
    else:  # this should not happen
        return [], []

    tlat_min, tlat_max, tlon_min, tlon_max = clusters.get_discretized_limits(
        lat_min, lat_max, lon_min, lon_max, grid_size)

    catalog = catalog_tool._catalog

    # getting the inner indexes for lat and lon
    lat_index = catalog.getIndex('geo_latitude')._index
    lon_index = catalog.getIndex('geo_longitude')._index

    # adjust to cover results outside frame, but very close to margins
    # trying to fix cluster flickering near margins

    # applying the lat and lon indexes to get the rids
    rs = None
    lat_set, lat_dict = _apply_index_with_range_dict_results(
        lat_index, Decimal(str(tlat_min)), Decimal(str(tlat_max)))
    w, rs = weightedIntersection(rs, lat_set)

    lon_set, lon_dict = _apply_index_with_range_dict_results(
        lon_index, Decimal(str(tlon_min)), Decimal(str(tlon_max)))
    w, rs = weightedIntersection(rs, lon_set)

    rs_final = None
    # OR the filters and apply the index for each one
    for f in filters:
        rs_f = rs

        #adjust geo limits in filters to be consistent with discretized tile limits
        f['geo_longitude']['query'] = (Decimal(str(tlon_min)),
                                       Decimal(str(tlon_max)))
        f['geo_latitude']['query'] = (Decimal(str(tlat_min)),
                                      Decimal(str(tlat_max)))

        #this code is from the search function in the catalog implementation in Zope
        for i in catalog.indexes.keys():
            index = catalog.getIndex(i)
            _apply_index = getattr(index, "_apply_index", None)
            if _apply_index is None:
                continue
            r = _apply_index(f)

            if r is not None:
                r, u = r
                w, rs_f = weightedIntersection(rs_f, r)

        w, rs_final = weightedUnion(rs_f, rs_final)

    r_list = list(rs_final)

    # transform objects to points
    points = []
    for i in range(len(r_list)):
        points.append(
            clusters.Point(i, float(lat_dict[r_list[i]]),
                           float(lon_dict[r_list[i]])))

    centers, groups = clusters.kmeans(tlat_min, tlat_max, tlon_min, tlon_max,
                                      points, grid_size)

    # transform group points to rids
    for i in range(len(groups)):
        groups[i] = map(lambda p: r_list[p.id], groups[i])

    return centers, groups
示例#9
0
def getClusters(catalog_tool, filters):
    # the objects are searched for in the tile limits (to get the same clusters every time)
    grid_size = 12 # geopoints' and clusters' density on map / also depends on map frame size

    # unpack map limits
    if filters:
        lat_min = float(filters[0]['geo_latitude']['query'][0])
        lat_max = float(filters[0]['geo_latitude']['query'][1])

        lon_min = float(filters[0]['geo_longitude']['query'][0])
        lon_max = float(filters[0]['geo_longitude']['query'][1])
    else: # this should not happen
        return [], []

    tlat_min, tlat_max, tlon_min, tlon_max = clusters.get_discretized_limits(lat_min, lat_max, lon_min, lon_max, grid_size)

    catalog = catalog_tool._catalog

    # getting the inner indexes for lat and lon
    lat_index = catalog.getIndex('geo_latitude')._index
    lon_index = catalog.getIndex('geo_longitude')._index

    # adjust to cover results outside frame, but very close to margins
    # trying to fix cluster flickering near margins

    # applying the lat and lon indexes to get the rids
    rs = None
    lat_set, lat_dict = _apply_index_with_range_dict_results(lat_index, Decimal(str(tlat_min)), Decimal(str(tlat_max)))
    w, rs = weightedIntersection(rs, lat_set)

    lon_set, lon_dict = _apply_index_with_range_dict_results(lon_index, Decimal(str(tlon_min)), Decimal(str(tlon_max)))
    w, rs = weightedIntersection(rs, lon_set)

    rs_final = None
    # OR the filters and apply the index for each one
    for f in filters:
        rs_f = rs

        #adjust geo limits in filters to be consistent with discretized tile limits
        f['geo_longitude']['query'] = (Decimal(str(tlon_min)), Decimal(str(tlon_max)))
        f['geo_latitude']['query'] = (Decimal(str(tlat_min)), Decimal(str(tlat_max)))

        #this code is from the search function in the catalog implementation in Zope
        for i in catalog.indexes.keys():
            index = catalog.getIndex(i)
            _apply_index = getattr(index, "_apply_index", None)
            if _apply_index is None:
                continue
            r = _apply_index(f)

            if r is not None:
                r, u = r
                w, rs_f = weightedIntersection(rs_f, r)

        w, rs_final = weightedUnion(rs_f, rs_final)

    r_list = list(rs_final)

    # transform objects to points
    points = []
    for i in range(len(r_list)):
        points.append(clusters.Point(i, float(lat_dict[r_list[i]]), float(lon_dict[r_list[i]])))

    centers, groups = clusters.kmeans(tlat_min, tlat_max, tlon_min, tlon_max, points, grid_size)

    # transform group points to rids
    for i in range(len(groups)):
        groups[i] = map(lambda p: r_list[p.id], groups[i])

    return centers, groups