示例#1
0
def join_CCs(X, G, embed_dim, num_ccs=1, max_angle=0.3, verbose=False):
  n, labels = G.connected_components(return_labels=True)
  # compute linear subspaces for each connected component
  #  (assumed to be local+linear)
  CC_planes, _ = cluster_subspaces(X, embed_dim, n, labels)
  CC_labels = labels  # keep around the original labels that go with CC_planes
  angle_thresh = 0.1
  while n > num_ccs:
    # compute the distance between all clusters
    #   (by finding the distance between the closest 2 member points)
    Dcenter, min_edge_idxs = inter_cluster_distance(X, n, labels)
    # Find "meta-edges" between clusters (k=1)
    ninds = nearest_neighbors(Dcenter, precomputed=True, k=2)  # self + 1 == 2
    meta_edge_lengths = Dcenter[ninds[:,0],ninds[:,1]]
    dist_thresh = np.median(meta_edge_lengths)
    if verbose:  # pragma: no cover
      print(n, 'CCs')
    # convert ninds to CC_ninds (back to the CC_labels space, via W-space)
    CC_ninds = CC_labels[min_edge_idxs[ninds[:,0],ninds[:,1]]]
    # modify G to connect edges between nearby CCs
    while True:
      if verbose:  # pragma: no cover
        print('DT:', dist_thresh, 'AT:', angle_thresh)
      G, num_added, minD, minF = _connect_meta_edges(
          X, G, CC_planes, CC_labels, CC_ninds,
          dist_thresh=dist_thresh, angle_thresh=angle_thresh)
      if num_added > 0:
        break
      elif angle_thresh < minF <= max_angle:
        angle_thresh = minF
      elif dist_thresh < minD:
        if np.isinf(minD):
          max_angle += 0.1  # XXX: hack
          angle_thresh = min(minF, max_angle)
          if verbose:  # pragma: no cover
            print('Increasing max_angle to', max_angle)
        else:
          dist_thresh = minD
      else:
        raise AssertionError("Impossible state: can't increase dist_thresh "
                             "enough to make a connection")

    # recalc CCs and repeat (keeping the original CC_planes!)
    #  until there's only one left.
    n, labels = G.connected_components(return_labels=True)
  return CC_labels, angle_thresh
示例#2
0
def grow_trees(X, G, embed_dim, verbose=False):
  dist_thresh = 0
  while True:
    n, labels = G.connected_components(return_labels=True)
    tree_sizes = np.bincount(labels)
    min_tree_size = tree_sizes.min()
    if min_tree_size > embed_dim:
      break
    Dcenter, min_edge_idxs = inter_cluster_distance(X, n, labels)
    pairs = nearest_neighbors(Dcenter, precomputed=True, k=2)  # self + 1 == 2
    ninds = pairs[tree_sizes == min_tree_size]
    meta_edge_lengths = Dcenter[ninds[:,0],ninds[:,1]]
    dist_thresh = max(dist_thresh, np.max(meta_edge_lengths))
    if verbose:  # pragma: no cover
      print(n, 'CCs. dist thresh:', dist_thresh)
    # modify G to connect edges between nearby CCs
    G, num_added = _connect_meta_edges(X, G, None, labels, ninds,
                                       dist_thresh=dist_thresh)[:2]
    assert num_added > 0
  return G