def discrepancy_seeds(goods, bads, all_locs): """Find regions with concentration of good points compared with bad ones.""" import spatial_scan as sps size = 50 support = 8 sps.GRID_SIZE = size sps.TOP_K = 500 xedges, yedges = [ np.linspace(low, high, size + 1) for low, high in zip(np.min(all_locs, 0), np.max(all_locs, 0)) ] bins = (xedges, yedges) good_ids, good_loc = goods bad_ids, bad_loc = bads count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins) measured = count.T.ravel() count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins) background = count.T.ravel() total_b = np.sum(background) total_m = np.sum(measured) discrepancy = sps.get_discrepancy_function(total_m, total_b, support) def euc_index_to_rect(idx): """Return the bounding box of a grid's cell defined by its `idx`""" i = idx % size j = idx / size return [xedges[i], yedges[j], xedges[i + 1], yedges[j + 1]] sps.index_to_rect = euc_index_to_rect top_loc = sps.exact_grid(np.reshape(measured, (size, size)), np.reshape(background, (size, size)), discrepancy, sps.TOP_K, sps.GRID_SIZE / 8) merged = sps.merge_regions(top_loc) gcluster = [] bcluster = [] hulls = [] for region in merged: gcluster.append([ id_ for id_, loc in zip(good_ids, good_loc) if region[1].contains(sgeo.Point(loc)) ]) bcluster.append([ id_ for id_, loc in zip(bad_ids, bad_loc) if region[1].contains(sgeo.Point(loc)) ]) hulls.append(region[1].convex_hull) return hulls, gcluster, bcluster
def discrepancy_seeds(goods, bads, all_locs): """Find regions with concentration of good points compared with bad ones.""" import spatial_scan as sps size = 50 support = 8 sps.GRID_SIZE = size sps.TOP_K = 500 xedges, yedges = [np.linspace(low, high, size+1) for low, high in zip(np.min(all_locs, 0), np.max(all_locs, 0))] bins = (xedges, yedges) good_ids, good_loc = goods bad_ids, bad_loc = bads count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins) measured = count.T.ravel() count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins) background = count.T.ravel() total_b = np.sum(background) total_m = np.sum(measured) discrepancy = sps.get_discrepancy_function(total_m, total_b, support) def euc_index_to_rect(idx): """Return the bounding box of a grid's cell defined by its `idx`""" i = idx % size j = idx / size return [xedges[i], yedges[j], xedges[i+1], yedges[j+1]] sps.index_to_rect = euc_index_to_rect top_loc = sps.exact_grid(np.reshape(measured, (size, size)), np.reshape(background, (size, size)), discrepancy, sps.TOP_K, sps.GRID_SIZE/8) merged = sps.merge_regions(top_loc) gcluster = [] bcluster = [] hulls = [] for region in merged: gcluster.append([id_ for id_, loc in zip(good_ids, good_loc) if region[1].contains(sgeo.Point(loc))]) bcluster.append([id_ for id_, loc in zip(bad_ids, bad_loc) if region[1].contains(sgeo.Point(loc))]) hulls.append(region[1].convex_hull) return hulls, gcluster, bcluster
def do_scan(client, city, k, photos_as_background=True): """Perform discrepancy scan on `city` with grid_size.""" background, measured = load_frequency(client, city, k, photos_as_background) total_b = np.sum(background) total_m = np.sum(measured) if not total_m > 0: return if 0 < total_m <= 500: support = 20 if 500 < total_m <= 2000: support = 40 if 2000 < total_m: support = sps.MAX_SUPPORT discrepancy = sps.get_discrepancy_function(total_m, total_b, support) grid_dim = (k, k) info = u'g={}, s={}, k={}, w={}, h={}, max={}' print(info.format(k, support, sps.TOP_K, sps.MIN_WIDTH, sps.MIN_HEIGHT, sps.MAX_SIZE)) top_loc = sps.exact_grid(np.reshape(measured, grid_dim), np.reshape(background, grid_dim), discrepancy, sps.TOP_K, k/sps.MAX_SIZE) return top_loc, compute_ratio(background, measured)
def do_scan(client, city, k, photos_as_background=True): """Perform discrepancy scan on `city` with grid_size.""" background, measured = load_frequency(client, city, k, photos_as_background) total_b = np.sum(background) total_m = np.sum(measured) if not total_m > 0: return if 0 < total_m <= 500: support = 20 if 500 < total_m <= 2000: support = 40 if 2000 < total_m: support = sps.MAX_SUPPORT discrepancy = sps.get_discrepancy_function(total_m, total_b, support) grid_dim = (k, k) info = u'g={}, s={}, k={}, w={}, h={}, max={}' print( info.format(k, support, sps.TOP_K, sps.MIN_WIDTH, sps.MIN_HEIGHT, sps.MAX_SIZE)) top_loc = sps.exact_grid(np.reshape(measured, grid_dim), np.reshape(background, grid_dim), discrepancy, sps.TOP_K, k / sps.MAX_SIZE) return top_loc, compute_ratio(background, measured)