def eccentricity(data, exponent=1., metricpar={}, callback=None): if data.ndim==1: assert metricpar=={}, 'No optional parameter is allowed for a dissimilarity matrix.' ds = squareform(data, force='tomatrix') if exponent in (np.inf, 'Inf', 'inf'): return ds.max(axis=0) elif exponent==1.: ds = np.power(ds, exponent) return ds.sum(axis=0)/float(np.alen(ds)) else: ds = np.power(ds, exponent) return np.power(ds.sum(axis=0)/float(np.alen(ds)), 1./exponent) else: progress = progressreporter(callback) N = np.alen(data) ecc = np.empty(N) if exponent in (np.inf, 'Inf', 'inf'): for i in range(N): ecc[i] = cdist(data[(i,),:], data, **metricpar).max() progress((i+1)*100//N) elif exponent==1.: for i in range(N): ecc[i] = cdist(data[(i,),:], data, **metricpar).sum()/float(N) progress((i+1)*100//N) else: for i in range(N): dsum = np.power(cdist(data[(i,),:], data, **metricpar), exponent).sum() ecc[i] = np.power(dsum/float(N), 1./exponent) progress((i+1)*100//N) return ecc
def eccentricity(data, exponent=1., metricpar={}, callback=None): if data.ndim==1: assert metricpar=={}, 'No optional parameter is allowed for a dissimilarity matrix.' ds = squareform(data, force='tomatrix') if exponent in (np.inf, 'Inf', 'inf'): return ds.max(axis=0) elif exponent==1.: ds = np.power(ds, exponent) return ds.sum(axis=0)/float(np.alen(ds)) else: ds = np.power(ds, exponent) return np.power(ds.sum(axis=0)/float(np.alen(ds)), 1./exponent) else: progress = progressreporter(callback) N = np.alen(data) ecc = np.empty(N) if exponent in (np.inf, 'Inf', 'inf'): for i in range(N): ecc[i] = cdist(data[(i,),:], data, **metricpar).max() progress((i+1)*100//N) elif exponent==1.: for i in range(N): ecc[i] = cdist(data[(i,),:], data, **metricpar).sum()/float(N) progress((i+1)*100//N) else: for i in range(N): dsum = np.power(cdist(data[(i,),:], data, **metricpar), exponent).sum() ecc[i] = np.power(dsum/float(N), 1./exponent) progress((i+1)*100//N) return ecc
def nearest_neighbors_from_dm(X, k, callback=None): ''' This is inefficient. To be done: (1) Do not fully sort every row of the distance matrix but find the first k=lo elements. (2) Use the compressed distance matrix, not the square form. Both improvements are realized in cmappertools 1.0.5. ''' progress = progressreporter(callback) D = squareform(X, force='tomatrix') N = np.alen(D) j = np.empty((N,k), dtype=np.intp) d = np.empty((N,k)) for i, row in enumerate(D): j[i] = np.argsort(row)[:k] d[i] = D[i,j[i]] progress((i+1)*100//N) return d, j
def nearest_neighbors_from_dm(X, k, callback=None): ''' This is inefficient. To be done: (1) Do not fully sort every row of the distance matrix but find the first k=lo elements. (2) Use the compressed distance matrix, not the square form. Both improvements are realized in cmappertools 1.0.5. ''' progress = progressreporter(callback) D = squareform(X, force='tomatrix') N = np.alen(D) j = np.empty((N,k), dtype=np.intp) d = np.empty((N,k)) for i, row in enumerate(D): j[i] = np.argsort(row)[:k] d[i] = D[i,j[i]] progress((i+1)*100//N) return d, j
def Gauss_density(data, sigma, metricpar={}, callback=None): denom = -2.*sigma*sigma if data.ndim==1: assert metricpar=={}, ('No optional parameter is allowed for a ' 'dissimilarity matrix.') ds = squareform(data, force='tomatrix') dd = np.exp(ds*ds/denom) # no normalization since the dimensionality is not known #dd = 1/(N*(sqrt(2*pi)*sigma)^n)*exp(-ds*ds/(2*sigma*sigma)), # where N=#samples, n=dimensionality dens = dd.sum(axis=0) else: progress = progressreporter(callback) N = np.alen(data) dens = np.empty(N) for i in range(N): d = cdist(data[(i,),:], data, **metricpar) dens[i] = np.exp(d*d/denom).sum() progress(((i+1)*100//N)) dens /= N*np.power(np.sqrt(2*np.pi)*sigma,data.shape[1]) return dens
def Gauss_density(data, sigma, metricpar={}, callback=None): denom = -2.*sigma*sigma if data.ndim==1: assert metricpar=={}, ('No optional parameter is allowed for a ' 'dissimilarity matrix.') ds = squareform(data, force='tomatrix') dd = np.exp(ds*ds/denom) # no normalization since the dimensionality is not known #dd = 1/(N*(sqrt(2*pi)*sigma)^n)*exp(-ds*ds/(2*sigma*sigma)), # where N=#samples, n=dimensionality dens = dd.sum(axis=0) else: progress = progressreporter(callback) N = np.alen(data) dens = np.empty(N) for i in range(N): d = cdist(data[(i,),:], data, **metricpar) dens[i] = np.exp(d*d/denom).sum() progress(((i+1)*100//N)) dens /= N*np.power(np.sqrt(2*np.pi)*sigma,data.shape[1]) return dens
def do_scale_graph(M, weighting='inverse', exponent=0., maxcluster=None, expand_intervals=False, verbose=True, callback=None): ''' Compute the scale graph from a Mapper output. ''' M.add_info(cutoff="Scale graph algorithm ({0}, '{1}', {2})".\ format(exponent, expand_intervals, maxcluster)) sgd = M.scale_graph_data sgd.maxcluster = maxcluster sgd.expand_intervals = expand_intervals dendrogram = sgd.dendrogram diameter = sgd.diameter layers = len(dendrogram) # Add edges if verbose: sys.stdout.write('Add edges:') sys.stdout.flush() N2, LB2, UB2, diam2 = sgd.layerdata(0) Dijkstra = Layered_Dijkstra(weighting=weighting) Dijkstra.start(N2) progress = progressreporter(callback) for i in range(1,layers): N1, LB1, UB1, diam1 = N2, LB2, UB2, diam2 N2, LB2, UB2, diam2 = sgd.layerdata(i) Dijkstra.next_layer(N2) Dijkstra.add_edge(0,0) for j in takewhile(lambda j: LB1[j]>=diam2, range(N1)): Dijkstra.add_edge(j+1,0) for j in takewhile(lambda j: LB2[j]>=diam1, range(N2)): Dijkstra.add_edge(0,j+1) if N1 and N2: s0 = ( N1 if maxcluster is None else min(N1,maxcluster) ) + 1 t0 = ( N2 if maxcluster is None else min(N2,maxcluster) ) + 1 startk = 1 for j in range(1, s0): a = LB1[j] b = UB1[j-1] for k in range(startk, t0): c = LB2[k] d = UB2[k-1] if c>b: startk += 1 continue if d<a: break maxac = max(a,c) overlap = min(b,d)-maxac assert overlap>=0. if maxac>0: Dijkstra.add_edge(j, k, overlap, np.power(maxac, exponent)) progress(i*100//(layers-1)) if verbose: print(' {0} edges in total.'.format(Dijkstra.num_edges())) sgd.path, sgd.infmin = Dijkstra.shortest_path() if verbose: print('Scale graph path:\n{0}'.format(sgd.path)) sgd.edges = Dijkstra.edges