def initialize(self, X0=None, title='',**kwargs): """\ Set initial embedding. Parameters: X0 : numpy array or None Initial embedding. If set to None, the initial embedding is produced randomly using misc.initial_embedding(). """ if self.verbose > 0: print('- MDS.initialize('+title+'):') if X0 is None: X0 = misc.initial_embedding(self.N,dim=self.dim, radius=self.D_rms,**kwargs) if self.verbose > 0: print(' method : random') else: assert isinstance(X0,np.ndarray) assert X0.shape == (self.N,self.dim) if self.verbose > 0: print(' method : initialization given') self.X = X0 self.update() self.X0 = self.X.copy() if self.verbose > 0: print(f' initial stress : {self.cost:0.2e}[{self.ncost:0.2e}]')
def initialize(self, X0=None, **kwargs): """\ Set initial embedding. """ if self.verbose > 0: print(self.indent + ' TSNE.initialize():') if X0 is None: X0 = misc.initial_embedding(self.N, dim=self.dim, radius=1, **kwargs) #radius=self.D['rms'],**kwargs) if self.verbose > 0: print(self.indent + ' method : random') else: assert isinstance(X0, np.ndarray) assert X0.shape == (self.N, self.dim) if self.verbose > 0: print(self.indent + ' method : initialization given') self.update(X0) self.embedding0 = self.embedding.copy() if self.verbose > 0: print(self.indent + f' initial cost : {self.cost:0.2e}')
def initialize_X(self, X0=None, method='random', max_iters=50, **kwargs): """\ Set initial embedding using misc.initial function. Parameters: Y0 : numpy array Initial embedding (optional) number : int > 0 Number of initial embeddings to be generated and saved. When looking for a minimizer of the stress function, the optimization algorithm is run using the different initial embeddings and the best solution is retained. """ if self.verbose > 0: print('- Multiview.initialize_X():') if X0 is not None: if self.verbose > 0: print(' method : X0 given') assert isinstance(X0, np.ndarray) assert X0.shape == (self.N, self.persp.dimX) self.X = X0 else: if self.verbose > 0: print(' method : ', method) if method == 'random': self.X = misc.initial_embedding(self.N, dim=self.persp.dimX, radius=1) #radius=self.D_rms,**kwargs) elif method == 'mds': D = np.average(self.D, axis=0) vis = mds.MDS(D, dim=self.persp.dimX) vis.initialize() vis.optimize(max_iters=max_iters, **kwargs) self.X = vis.X self.update() self.X0 = self.X.copy()
def __init__(self, data, weights=None, data_args=None, fixed_embedding=None, fixed_projections=None, initial_embedding=None, initial_projections=None, visualization_method='mds', visualization_args={}, total_cost_function='rms', embedding_dimension=3, image_dimension=2, projection_family='linear',projection_constraint='orthogonal', hidden_samples=None, sample_labels=None, perspective_labels=None, sample_colors=None, image_colors=None, verbose=0, indent='', **kwargs): """\ Initializes MPSE object. Parameters ---------- data : list, length (n_perspectives) List containing distance/dissimilarity/feature data for each perspective. Each array can be of the following forms: 1) A 1D condensed distance array 2) A square distance matrix 3) An array containing features ***4) A dictionary describing a graph weights : None or string or array or list If visualization allows for it, weights to be used in computation of cost/gradiant of each perspective. IF a list is given, then the list must have length equal to the number of perspectives. Otherwise, it is assumed that the given weights are the same for all perspectives. The possible weights are described in setup.setup_weights. These are: 1) None : no weights are used 2) string : method to compute weights based on distances 3) function : function to compute weights based on distances 4) array : array containing pairwise weights or node weights, depending on size (must be of length of distances or of samples). data_args : dictionary (optional) or list Optional arguments to pass to distances.setup(). If a list is passed, then the length must be the number of perspectives and each element must be a dictionary. Then, each set of distances will be set up using a different set of arguments. fixed_embedding : array If an array is given, this is assumed to be the true embedding and by default optimization is done w.r.t. the projections only. fixed_projections : list If a list is given, this is assumed to be the true projections and by default optimization is done w.r.t. the embedding coordinates only. initial_embedding : array If given, this is the initial embedding used. initial_projections : list If given, this is the initial projections used. visualization_method : str Visualization method. Current options are 'mds' and 'tsne'. The visualization method can be different for different perspectives, by passing a list of visualization methods instead. visualization_args : dict Dictionary with arguments to pass to each visualization method. Different arguments can be passed to different visualization methods by passing a list of dictionaries instead. embedding_dimension : int Dimension of embedding. image_dimension : int Dimension of image (after projection). Each perspective can have a different image dimension, by specifying a list instead. projection_family : str Projection family. Options are 'linear'. projection_constraint : str Constraints on projection family. Options are None, 'orthogonal', 'similar'. embedding_dimension : int > 0 Dimension of the embedding. Alternative name: embedding_dimension projection_dimension : int or array Dimension of projections. Can be different for each perspective. persp : Object instance of projections.Persp class or int > 0. Describes set of allowed projection functions and stores list of projection parameters. See perspective.py. If instead of a Persp object a positive integer int is given, then it is assumed that embedding_dimension=image_dimension=int and that all projections are the identity. sample_labels : list (optional) List containing labels of samples (used in plots). sample_colors : array (optional) Array containing color value of samples (used in plots). image_colors : array-like, shape (n_perspectives, n_samples) Colors for each image. """ self.verbose, self.indent = verbose, indent if verbose > 0: print(indent+'mview.MPSE():') ##set up sets of distances from data self.distances = setup.setup_distances_from_multiple_perspectives( data, data_args) self.n_perspectives = len(self.distances) self.n_samples = scipy.spatial.distance.num_obs_y(self.distances[0]) ##set up weights from data if isinstance(weights,list) or isinstance(weights, np.ndarray): assert len(weights) == self.n_perspectives self.weights = weights else: self.weights = [weights]*self.n_perspectives for i in range(self.n_perspectives): self.weights[i] = setup.setup_weights(self.distances[i], \ self.weights[i], min_weight = 0) ##set up parameters self.embedding_dimension = embedding_dimension self.image_dimension = image_dimension self.projection_family = projection_family self.projection_constraint = projection_constraint proj = projections.PROJ(embedding_dimension,image_dimension, projection_family,projection_constraint) self.proj = proj ##set up hidden samples if hidden_samples is not None: assert isinstance(hidden_samples, list) assert len(hidden_samples) == self.n_perspectives self.hidden_samples = hidden_samples if verbose > 0: print(indent+' data details:') print(indent+f' number of perspectives : {self.n_perspectives}') print(indent+f' number of samples : {self.n_samples}') print(indent+' visualization details:') print(indent+' embedding dimension :',self.embedding_dimension) print(indent+f' image dimension : {self.image_dimension}') print(indent+f' visualization type : {visualization_method}') #setup sample labels: if sample_labels is not None: assert len(sample_labels) == self.n_samples self.sample_labels = sample_labels #setup perspective labels: if perspective_labels is None: perspective_labels = range(1,self.n_perspectives+1) else: assert len(perspective_labels) == self.n_perspectives self.perspective_labels = perspective_labels #setup colors: self.sample_colors = sample_colors self.image_colors = image_colors #setup visualization instances: self.visualization_instances = [] self.visualization_method = visualization_method if isinstance(visualization_method,str): visualization_method = [visualization_method]*self.n_perspectives if isinstance(visualization_args,dict): visualization_args = [visualization_args]*self.n_perspectives for i in range(self.n_perspectives): assert visualization_method[i] in ['mds','tsne'] if self.verbose > 0: print(' setup visualization instance for perspective', self.perspective_labels[i],':') if visualization_method[i] == 'mds': vis = mds.MDS(self.distances[i], weights = self.weights[i], embedding_dimension=self.image_dimension, verbose=self.verbose, indent=self.indent+' ', **visualization_args[i]) elif visualization_method[i] == 'tsne': vis = tsne.TSNE(self.distances[i], embedding_dimension=self.image_dimension, verbose=self.verbose, indent=self.indent+' ', **visualization_args[i]) self.visualization_instances.append(vis) self.visualization = self.visualization_instances #setup objectives: if total_cost_function == 'rms': self.total_cost_function = lambda individual_costs : \ np.sqrt(np.sum(individual_costs**2)/self.n_perspectives) else: assert callable(total_cost_function) self.total_cost_function = total_cost_function def cost_function(X,Q,Y=None,**kwargs): if Y is None: Y = self.proj.project(Q,X) individual_costs = np.zeros(self.n_perspectives) for k in range(self.n_perspectives): individual_costs[k] = \ self.visualization[k].objective(Y[k],**kwargs) cost = self.total_cost_function(individual_costs) return cost, individual_costs self.cost_function = cost_function #setup gradient function: if self.projection_family == 'linear': def gradient(embedding,projections,batch_size=None,indices=None, return_embedding=True,return_projections=True, return_cost=True, return_individual_costs=False): """\ Returns MPSE gradient(s), along with cost and individual costs (optional). Parameters ---------- embedding : numpy array Current embedding. projections : numpy array Current projections (as a single array). return_embedding : boolean If True, returns MPSE gradient w.r.t. embedding. return_projections : boolean If True, returns MPSE gradient w.r.t. projections. return_cost : boolean If True, returns MPSE cost. return_individual_costs : boolean If True, returns individual embedding costs. """ if return_embedding: dX = np.zeros(embedding.shape) if return_projections: dQ = [] individual_costs = np.empty(self.n_perspectives) Y = self.proj.project(projections,embedding) for k in range(self.n_perspectives): dY_k, cost_k = self.visualization[k].gradient( Y[k],batch_size=batch_size,indices=indices) individual_costs[k] = cost_k if return_embedding: dX += dY_k @ projections[k][:2, :3] if return_projections: dQ.append(dY_k.T @ embedding) if return_embedding: dX /= self.n_perspectives cost = self.total_cost_function(individual_costs) if return_embedding is False: grad = np.array(dQ) elif return_projections is False: grad = dX else: grad = [dX,np.array(dQ)] if return_individual_costs: return grad, cost, individual_costs else: return grad, cost self.gradient = gradient else: def gradient_X(X,Q,Y=None): pgradient = self.proj.compute_gradient(X[0],params_list=Q) if Y is None: Y = self.proj.project(X,params_list=Q) gradient = np.zeros((self.n_samples,self.embedding_dimension)) for k in range(self.n_perspectives): gradient += self.visualization[k].gradient(Y[k]) \ @ pgradient[k] return gradient self.gradient_X = gradient_X #set up initial embedding and projections (fixed optional): if verbose > 0: print(indent+' initialize:') #set fixed and initial embedding: if fixed_embedding is not None: if verbose > 0: print(indent+' fixed embedding : True') self.embedding = fixed_embedding self.initial_embedding = fixed_embedding self.fixed_embedding = True else: if verbose > 0: print(indent+' fixed embedding : False') if initial_embedding is None: if verbose > 0: print(indent+' initial embedding : random') self.initial_embedding = misc.initial_embedding( self.n_samples,dim=self.embedding_dimension, radius=1) else: assert isinstance(initial_embedding,np.ndarray) assert initial_embedding.shape == ( self.n_samples, self.embedding_dimension) if verbose > 0: print(indent+' initial embedding : given') self.initial_embedding = initial_embedding self.embedding = self.initial_embedding self.fixed_embedding = False #set fixed and initial projections: if fixed_projections is not None: if isinstance(fixed_projections,str): fixed_projections = self.proj.generate(number= \ self.n_perspectives,method=fixed_projections) assert(all([isinstance(fp,np.ndarray) for fp in fixed_projections])) fixed_projections = [f[:2, :3] for f in fixed_projections] self.projections = fixed_projections self.initial_projections = fixed_projections self.fixed_projections = True if verbose > 0: print(indent+' fixed projections : True') else: if verbose > 0: print(indent+' fixed projections : False') if initial_projections is None: if verbose > 0: print(indent+' initial projections : random') self.initial_projections = self.proj.generate( number=self.n_perspectives, **kwargs) else: if verbose > 0: print(indent+' initial projections : given') if isinstance(initial_projections,str): initial_projections = self.proj.generate(number= \ self.n_perspectives,method=initial_projections) self.initial_projections = initial_projections self.projections = self.initial_projections self.fixed_projections = False print(indent+' Projection is:') print(self.projections) self.initial_cost = None self.initial_individual_cost = None self.computation_history = [] self.time = 0 self.update(**kwargs)
def __init__(self, data, dim=2, weights=None, estimate=True, safety=1e-4, normalize=True, initial_embedding='random', sample_colors=None, verbose=0, indent='', **kwargs): """\ Initializes MDS object. Parameters: data : array or dictionary Distance/dissimilarity/feature data, which can have any of the following formats: 1) a 1D condensed distance array 2) a square distance matrix/array 3) a feature array 4) a dictionary describing a graph dim : int > 0 Embedding dimension. weights : None or str or callable or array Weights to be used in defining MDS stress. verbose : int >= 0 Print status of methods in MDS object if verbose > 0. indent : str When printing, add indent before printing every new line. """ self.verbose = verbose self.indent = indent if self.verbose > 0: print(self.indent + 'mview.MDS():') self.distances = setup.setup_distances(data, **kwargs) self.n_samples = scipy.spatial.distance.num_obs_y(self.distances) if safety is None: self.minimum_distance = None else: assert safety > 0 and safety <= 1e-2 self.minimum_distance = np.max(self.distances) * safety self.distances = np.maximum(self.distances, self.minimum_distance) self.weights = setup.setup_weights(self.distances, weights=weights) self.normalize = normalize if sample_colors is None: self.sample_colors = self.distances[0:self.n_samples] else: self.sample_colors = sample_colors assert isinstance(dim, int) assert dim > 0 self.dim = dim assert isinstance(estimate, bool) self.estimate = estimate self.objective = lambda X, **kwargs: stress( self.distances, X, weights=self.weights, normalize=self.normalize) def gradient(embedding, batch_size=None, indices=None, **kwargs): if batch_size is None or batch_size >= self.n_samples: return full_gradient(self.distances, embedding, weights=self.weights, normalize=self.normalize, minimum_distance=self.minimum_distance) else: return batch_gradient(self.distances, embedding, batch_size, indices, weights=self.weights, normalize=self.normalize, minimum_distance=self.minimum_distance) self.gradient = gradient if verbose > 0: print(indent + ' data details:') print(indent + f' number of samples : {self.n_samples}') if self.weights is None: print(indent + f' weighted : False') else: print(indent + f' weighted : True') print(indent + ' embedding details:') print(indent + f' embedding dimension : {self.dim}') #save or compute initial embedding if isinstance(initial_embedding, np.ndarray): assert initial_embedding.shape == (self.n_samples, self.dim) if self.verbose > 0: print(' initial embedding : given') self.X0 = initial_embedding self.X = self.X0 elif initial_embedding == 'random': self.X0 = misc.initial_embedding(self.n_samples, dim=self.dim, radius=1, **kwargs) self.X = self.X0 if self.verbose > 0: print(' initial embedding : random') else: assert initial_embedding is None #save initial costs if initial_embedding is not None: self.initial_cost = self.objective(self.X0, **kwargs) self.cost = self.initial_cost if self.verbose > 0: print(f' initial stress : {self.cost:0.2e}') self.computation_history = []