def _localReadMoreXML(self, xmlNode): """ Method that reads the portion of the xml input that belongs to this specialized class and initializes internal parameters @ In, xmlNode, xml.etree.Element, Xml element node @ Out, None """ self.distParams = {} for child in xmlNode: if child.tag == 'metricType': self.metricType = child.text else: self.distParams[str(child.tag)] = utils.tryParse(child.text) availableMetrics = pairwise.kernel_metrics().keys( ) + pairwise.distance_metrics().keys() + scores.keys() if self.metricType not in availableMetrics: metricList = ', '.join( availableMetrics[:-1]) + ', or ' + availableMetrics[-1] self.raiseAnError( IOError, 'Metric SKL error: metricType ' + str(self.metricType) + ' is not available. Available metrics are: ' + metricList + '.') for key, value in self.distParams.items(): try: newValue = ast.literal_eval(value) if type(newValue) == list: newValue = np.asarray(newValue) self.distParams[key] = newValue except: self.distParams[key] = value
def test_nystroem_approximation(): # some basic tests rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 4)) # With n_components = n_samples this is exact X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X) K = rbf_kernel(X) assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K) trans = Nystroem(n_components=2, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert_equal(X_transformed.shape, (X.shape[0], 2)) # test callable kernel linear_kernel = lambda X, Y: np.dot(X, Y.T) trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert_equal(X_transformed.shape, (X.shape[0], 2)) # test that available kernels fit and transform kernels_available = kernel_metrics() for kern in kernels_available: trans = Nystroem(n_components=2, kernel=kern, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert_equal(X_transformed.shape, (X.shape[0], 2))
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) # nearest neighbors affinity with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) labels = sp.fit(X).labels_ assert_equal(adjusted_rand_score(y, labels), 1) assert_true( re.search(r'\bnot fully connected\b', str(warning_list[0].message))) sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = sp.fit(X).labels_ assert_equal(adjusted_rand_score(y, labels), 1) X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != 'additive_chi2': sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = sp.fit(X).labels_ assert_equal((X.shape[0], ), labels.shape) sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = sp.fit(X).labels_ assert_equal((X.shape[0], ), labels.shape) def histogram(x, y, **kwargs): """Histogram kernel implemented as a callable.""" assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = sp.fit(X).labels_ assert_equal((X.shape[0], ), labels.shape) # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity='<unknown>') assert_raises(ValueError, sp.fit, X)
def distance(self, x, y=None, **kwargs): """ This method returns the distance between two points x and y. If y is not provided then x is a pointSet and a distance matrix is returned @ In, x, dict, dictionary containing data of x @ In, y, dict, dictionary containing data of y @ Out, value, float or numpy.ndarray, distance between x and y (if y is provided) or a square distance matrix if y is None """ if y is not None: if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): dictTemp = utils.mergeDictionaries(kwargs, self.distParams) if self.metricType in pairwise.kernel_metrics().keys(): value = pairwise.kernel_metrics(X=x, Y=y, metric=self.metricType, **dictTemp) elif self.metricType in pairwise.distance_metrics(): value = pairwise.pairwise_distances(X=x, Y=y, metric=self.metricType, **dictTemp) return value else: self.raiseAnError( IOError, 'Metric SKL error: SKL metrics support only PointSets and not HistorySets' ) else: if self.metricType == 'mahalanobis': covMAtrix = np.cov(x.T) kwargs['VI'] = np.linalg.inv(covMAtrix) dictTemp = utils.mergeDictionaries(kwargs, self.distParams) if self.metricType in pairwise.kernel_metrics().keys(): value = pairwise.pairwise_kernels(X=x, metric=self.metricType, **dictTemp) elif self.metricType in pairwise.distance_metrics().keys(): value = pairwise.pairwise_distances(X=x, metric=self.metricType, **dictTemp) return value
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity="nearest_neighbors", random_state=0) with pytest.warns(UserWarning, match="not fully connected"): sp.fit(X) assert adjusted_rand_score(y, sp.labels_) == 1 sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = sp.fit(X).labels_ assert adjusted_rand_score(y, labels) == 1 X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != "additive_chi2": sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0], ) == labels.shape sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0], ) == labels.shape def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert kwargs == {} # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0], ) == labels.shape # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity="<unknown>") with pytest.raises(ValueError): sp.fit(X)
def edge_weight(x, y, mode='rbf', gamma=0.5): dists = distance_metrics() kernels = kernel_metrics() kernels['bhattacharya'] = bhattacharya kernels['intersection'] = intersection if mode in dists: diff = dists[mode](x, y) elif mode in kernels: diff = kernels[mode](x, y, gamma=gamma) else: raise Exception('Mode not recognised') return np.float64(diff)
def distance(self, x, y=None, **kwargs): """ This method returns the distance between two points x and y. If y is not provided then x is a pointSet and a distance matrix is returned @ In, x, numpy.ndarray, array containing data of x, if 1D array is provided, the array will be reshaped via x.reshape(1,-1) @ In, y, numpy.ndarray, array containing data of y, if 1D array is provided, the array will be reshaped via y.reshape(1,-1) @ Out, value, numpy.ndarray, distance between x and y (if y is provided) or a square distance matrix if y is None """ if y is not None: if isinstance(x,np.ndarray) and isinstance(y,np.ndarray): if len(x.shape) == 1: x = x.reshape(1,-1) #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via x.reshape(1,-1) ") if len(y.shape) == 1: y = y.reshape(1,-1) #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via y.reshape(1,-1) ") dictTemp = utils.mergeDictionaries(kwargs,self.distParams) if self.metricType in pairwise.kernel_metrics().keys(): value = pairwise.pairwise_kernels(X=x, Y=y, metric=self.metricType, **dictTemp) elif self.metricType in pairwise.distance_metrics(): value = pairwise.pairwise_distances(X=x, Y=y, metric=self.metricType, **dictTemp) if value.shape == (1,1): return value[0] else: return value else: self.raiseAnError(IOError,'Metric SKL error: SKL metrics support only PointSets and not HistorySets') else: if self.metricType == 'mahalanobis': covMAtrix = np.cov(x.T) kwargs['VI'] = np.linalg.inv(covMAtrix) dictTemp = utils.mergeDictionaries(kwargs,self.distParams) if self.metricType in pairwise.kernel_metrics().keys(): value = pairwise.pairwise_kernels(X=x, metric=self.metricType, **dictTemp) elif self.metricType in pairwise.distance_metrics().keys(): value = pairwise.pairwise_distances(X=x, metric=self.metricType, **dictTemp) if value.shape == (1,1): return value[0] else: return value
def __init__(self, kernel, kernel_params={}, n_jobs=1): self.kernel = kernel self.n_jobs = n_jobs if self.kernel == 'mallow': self.kernel_ = mallow_kernel_wrapper(self.n_jobs) else: self.kernel_ = kernel_metrics()[kernel] self.kernel_params_ = kernel_params self._source_data = None self._target_data = None self._data = {} self.center = False self._empty_kernel_values()
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) assert_warns_message(UserWarning, 'not fully connected', sp.fit, X) assert_equal(adjusted_rand_score(y, sp.labels_), 1) sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = sp.fit(X).labels_ assert_equal(adjusted_rand_score(y, labels), 1) X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != 'additive_chi2': sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = sp.fit(X).labels_ assert_equal((X.shape[0],), labels.shape) sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = sp.fit(X).labels_ assert_equal((X.shape[0],), labels.shape) def histogram(x, y, **kwargs): """Histogram kernel implemented as a callable.""" assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = sp.fit(X).labels_ assert_equal((X.shape[0],), labels.shape) # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity='<unknown>') assert_raises(ValueError, sp.fit, X)
def krx(X, target=None, target2=None, metric="rbf", K_b_inv=None): r'''Computes Kernelized RX anomaly detector scores. Usage: y = rx(X [, background=bg]) The RX anomaly detector produces a detection statistic equal to the squared Mahalanobis distance of a spectrum from a background distribution according to .. math:: y=(x-\mu_b)^T\Sigma^{-1}(x-\mu_b) where `x` is the pixel spectrum, :math:`\mu_b` is the background mean, and :math:`\Sigma` is the background covariance. Arguments: `X` (numpy.ndarray): For the first calling method shown, `X` can be an image with shape (R, C, B) or an ndarray of shape (R * C, B). If the `background` keyword is given, it will be used for the image background statistics; otherwise, background statistics will be computed from `X`. Returns numpy.ndarray: The return value will be the RX detector score (squared Mahalanobis distance) for each pixel given. If `X` has shape (R, C, B), the returned ndarray will have shape (R, C).. References: Reed, I.S. and Yu, X., "Adaptive multiple-band CFAR detection of an optical pattern with unknown spectral distribution," IEEE Trans. Acoust., Speech, Signal Processing, vol. 38, pp. 1760-1770, Oct. 1990. ''' #TODO SOS: update this block if metric not in kernel_metrics(): raise ValueError('`%` is not a supported metric.' % metric) return KRX(target=target, target2=target2, metric=metric, K_b_inv=K_b_inv)(X)
_METRICS_SCALAR_PAIRWISE = {} _METRICS_MISC_PAIRWISE = {} # Update with dict of kernel names and functions. # >>> kernel_metrics() # {'additive_chi2': sklearn.metrics.pairwise.additive_chi2_kernel, # 'chi2': sklearn.metrics.pairwise.chi2_kernel, # 'linear': sklearn.metrics.pairwise.linear_kernel, # 'polynomial': sklearn.metrics.pairwise.polynomial_kernel, # 'poly': sklearn.metrics.pairwise.polynomial_kernel, # 'rbf': sklearn.metrics.pairwise.rbf_kernel, # 'laplacian': sklearn.metrics.pairwise.laplacian_kernel, # 'sigmoid': sklearn.metrics.pairwise.sigmoid_kernel, # 'cosine': sklearn.metrics.pairwise.cosine_similarity} # (Last Updated: sklearn.__version__ == 0.19.1) _METRICS_MISC_PAIRWISE.update(sk_pairwise.kernel_metrics()) # Update with dict of distance names and functions. # >>> distance_metrics() # {'cityblock': sklearn.metrics.pairwise.manhattan_distances, # \/ # 'cosine': sklearn.metrics.pairwise.cosine_distances, # 'euclidean': sklearn.metrics.pairwise.euclidean_distances, # \/ # 'l2': sklearn.metrics.pairwise.euclidean_distances, # /\ # 'l1': sklearn.metrics.pairwise.manhattan_distances, # \/ # 'manhattan': sklearn.metrics.pairwise.manhattan_distances, # /\ # 'precomputed': None} # (Last Updated: sklearn.__version__ == 0.19.1) _METRICS_MISC_PAIRWISE.update(sk_pairwise.distance_metrics()) # Update with paired distance names (prepend "paired_") and functions. # >>> {'paired_' + k: v for k, v in # ... iteritems(sk_pairwise.PAIRED_DISTANCES.copy())} # {'paired_cosine': sklearn.metrics.pairwise.paired_cosine_distances,
def distance(self, x, y=None, **kwargs): """ This method returns the distance between two points x and y. If y is not provided then x is a pointSet and a distance matrix is returned @ In, x, numpy.ndarray, array containing data of x, if 1D array is provided, the array will be reshaped via x.reshape(1,-1) @ In, y, numpy.ndarray, array containing data of y, if 1D array is provided, the array will be reshaped via y.reshape(1,-1) @ Out, value, numpy.ndarray, distance between x and y (if y is provided) or a square distance matrix if y is None """ if y is not None: if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): if len(x.shape) == 1 and self.metricType not in scores.keys(): x = x.reshape(1, -1) #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via x.reshape(1,-1) ") if len(y.shape) == 1 and self.metricType not in scores.keys(): y = y.reshape(1, -1) #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via y.reshape(1,-1) ") dictTemp = utils.mergeDictionaries(kwargs, self.distParams) try: if self.metricType in pairwise.kernel_metrics().keys(): value = pairwise.pairwise_kernels( X=x, Y=y, metric=self.metricType, **dictTemp) elif self.metricType in pairwise.distance_metrics(): value = pairwise.pairwise_distances( X=x, Y=y, metric=self.metricType, **dictTemp) elif self.metricType in scores.keys(): value = np.zeros((1, 1)) value[:, :] = scores[self.metricType](x, y, **dictTemp) except TypeError as e: self.raiseAWarning( 'There are some unexpected keyword arguments found in Metric with type "', self.metricType, '"!') self.raiseAnError(TypeError, 'Input parameters error:\n', str(e), '\n') if value.shape == (1, 1): return value[0] else: return value else: self.raiseAnError( IOError, 'Metric SKL error: SKL metrics support only PointSets and not HistorySets' ) else: if self.metricType == 'mahalanobis': covMAtrix = np.cov(x.T) kwargs['VI'] = np.linalg.inv(covMAtrix) dictTemp = utils.mergeDictionaries(kwargs, self.distParams) try: if self.metricType in pairwise.kernel_metrics().keys(): value = pairwise.pairwise_kernels(X=x, metric=self.metricType, **dictTemp) elif self.metricType in pairwise.distance_metrics().keys(): value = pairwise.pairwise_distances(X=x, metric=self.metricType, **dictTemp) except TypeError as e: self.raiseAWarning( 'There are some unexpected keyword arguments found in Metric with type "', self.metricType, '"!') self.raiseAnError(TypeError, 'Input parameters error:\n', str(e), '\n') if value.shape == (1, 1): return value[0] else: return value
def spectral_clustering(input: dict, output: dict, params: dict, log: list) -> None: """ Perform spectral clustering on the input connectivity matrix. Parameters ---------- input : dict Input files, allowed: {connectivity} output : dict Output file, allowed {labels} params : dict The dict is equivalent to cluster_options in the CBPtools documentation on readthedocs.io under the parameters for 'clustering'. log : dict Logging files, allowed {log} """ # Input, output, params connectivity_file = input.get('connectivity') labels_file = output.get('labels') log_file = log[0] n_init = params.get('n_init') kernel = params.get('kernel') assign_labels = params.get('assign_labels') eigen_solver = params.get('eigen_solver') n_clusters = params.get('n_clusters') gamma = params.get('gamma', None) n_neighbors = params.get('n_neighbors', None) degree = params.get('degree', None) coef0 = params.get('coef0', None) eigen_tol = params.get('eigen_tol', None) # Set up logging logger = get_logger('spectral_clustering', log_file) _, ext = os.path.splitext(connectivity_file) connectivity = np.load(connectivity_file) if ext == '.npz': connectivity = connectivity.get('connectivity') # If the connectivity file is empty (connectivity could not be computed), # create an empty labels file if connectivity.size == 0: logger.warning('%s is empty, aborting clustering' % connectivity_file) np.save(labels_file, np.array([])) return if isinstance(eigen_tol, str): eigen_tol = float(eigen_tol) kernels = list(kernel_metrics().keys()) kernels.extend(['nearest_neighbors', 'precomputed', 'precomputed_nearest_neighbors']) if kernel not in kernels: msg = 'Unknown kernel (affinity): %s' % kernel logger.error(msg) raise ValueError(msg) gamma_kernels = ('rbf', 'polynomial', 'sigmoid', 'laplacian', 'chi2') if gamma is None and kernel in gamma_kernels: msg = 'Setting gamma to 1./%s (1./n_features)' % connectivity.shape[1] logger.warning(msg) gamma = 1./connectivity.shape[1] kwargs = {'n_clusters': n_clusters, 'n_init': n_init, 'affinity': kernel, 'assign_labels': assign_labels, 'eigen_solver': eigen_solver, 'gamma': gamma, 'n_neighbors': n_neighbors, 'degree': degree, 'coef0': coef0} kwargs = {k: v for k, v in kwargs.items() if v is not None} debug_msg = str(['%s=%s' % (k, v) for k, v in kwargs.items()]) debug_msg = debug_msg.strip('[]').replace('\'', '') logger.debug('clustering %s with options: %s' % (connectivity_file, debug_msg)) # Perform spectral clustering on the available tolerances try: kwargs['eigen_tol'] = eigen_tol clustering = SpectralClustering(**kwargs) clustering.fit(connectivity) labels = clustering.labels_ if np.unique(labels).size != n_clusters: logging.error('%s: %s clusters requested, only %s found' % (labels_file, n_clusters, np.unique(labels).size)) np.save(labels_file, np.array([])) # cluster labels are 0-indexed np.save(labels_file, labels) except np.linalg.LinAlgError as exc: logger.error('%s: %s (try increasing the eigen_tol with arpack ' 'as eigen_solver)' % (labels_file, exc)) np.save(labels_file, np.array([]))