def _stop_training(self, debug=False): ##### request the covariance matrices and clean up self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # do not center around the mean: # we want the second moment matrix (centered about 0) and # not the second central moment matrix (centered about the mean), i.e. # the covariance matrix self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(center=False) del self._dcov_mtx rng = self._set_range() #### solve the generalized eigenvalue problem # the eigenvalues are already ordered in ascending order try: self.d, self.sf = self._symeig(self.dcov_mtx, self.cov_mtx, range=rng, overwrite=(not debug)) d = self.d # check that we get only *positive* eigenvalues if d.min() < 0: err_msg = ( "Got negative eigenvalues: %s." " You may either set output_dim to be smaller," " or prepend the SFANode with a PCANode(reduce=True)" " or PCANode(svd=True)" % str(d)) raise NodeException(err_msg) except SymeigException, exception: errstr = str(exception) + "\n Covariance matrices may be singular." raise NodeException(errstr)
def set_filters(self, filters): if not isinstance(filters, numx.ndarray): raise NodeException("'filters' argument must be a numpy array") if filters.ndim != 3: raise NodeException('Filters must be specified in a 3-dim array, with each '+ 'filter on a different row') self._filters = filters
def _pre_execution_checks(self, x): """This method contains all pre-execution checks. It can be used when a subclass defines multiple execution methods. In this case, the output dimension depends on the type of convolution we use (padding, full, ...). Also, we want to to be able to accept 3D arrays. """ # check input rank if not x.ndim in [2, 3]: error_str = "x has rank %d, should be 2 or 3" % (x.ndim) raise NodeException(error_str) # set 2D shape if necessary if self._input_shape is None: if x.ndim == 2: error_str = "Cannot infer 2D shape from 1D data points. " + \ "Data must have rank 3, or shape argument given." raise NodeException(error_str) else: self._input_shape = x.shape[1:] # set the input dimension if necessary if self.input_dim is None: self.input_dim = numx.prod(self._input_shape) # set the dtype if necessary if self.dtype is None: self.dtype = x.dtype # check the input dimension if not numx.prod(x.shape[1:]) == self.input_dim: error_str = "x has dimension %d, should be %d" % (x.shape[1], self.input_dim) raise NodeException(error_str) # set output_dim if necessary if self.output_dim is None: input_shape = self.input_shape filters_shape = self.filters.shape if self.mode == 'same': self._output_shape = input_shape elif self.mode == 'full': self._output_shape = (input_shape[0] + filters_shape[1] - 1, input_shape[1] + filters_shape[2] - 1) else: # mode == 'valid' self._output_shape = (input_shape[0] - filters_shape[1] + 1, input_shape[1] - filters_shape[2] + 1) self.output_dim = self.filters.shape[0] * numx.prod( self._output_shape) if x.shape[0] == 0: error_str = "x must have at least one observation (zero given)" raise NodeException(error_str)
def _stop_training(self, debug=False): ##### request the covariance matrices and clean up if hasattr(self, '_dcov_mtx'): self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # do not center around the mean: # we want the second moment matrix (centered about 0) and # not the second central moment matrix (centered about the mean), i.e. # the covariance matrix if hasattr(self, '_dcov_mtx'): self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(center=False) del self._dcov_mtx rng = self._set_range() #### solve the generalized eigenvalue problem # the eigenvalues are already ordered in ascending order try: try: # We first try to fulfill the extended signature described # in mdp.utils.symeig_semidefinite self.d, self.sf = self._symeig( self.dcov_mtx, self.cov_mtx, True, "on", rng, overwrite=(not debug), rank_threshold=self.rank_threshold, dfc_out=self) except TypeError: self.d, self.sf = self._symeig( self.dcov_mtx, self.cov_mtx, True, "on", rng, overwrite=(not debug)) d = self.d # check that we get only *positive* eigenvalues if d.min() < 0: err_msg = ("Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller,\n" "or prepend the SFANode with a PCANode(reduce=True)\n" "or PCANode(svd=True)\n" "or set a rank deficit method, e.g.\n" "create the SFA node with rank_deficit_method='auto'\n" "and try higher values for rank_threshold, e.g. try\n" "your_node.rank_threshold = 1e-10, 1e-8, 1e-6, ..."%str(d)) raise NodeException(err_msg) except SymeigException as exception: errstr = (str(exception)+"\n Covariance matrices may be singular.\n" +SINGULAR_VALUE_MSG) raise NodeException(errstr) if not debug: # delete covariance matrix if no exception occurred del self.cov_mtx del self.dcov_mtx # store bias self._bias = mult(self.avg, self.sf)
def _stop_training(self): try: inv_xTx = utils.inv(self._xTx + self.ridge_param * mdp.numx.eye(self._input_dim + 1)) except numx_linalg.LinAlgError, exception: errstr = (str(exception) + "\n Input data may be redundant (i.e., some of the " + "variables may be linearly dependent).") raise NodeException(errstr)
def _stop_training(self): try: if self.use_pinv: invfun = utils.pinv else: invfun = utils.inv inv_xTx = invfun(self._xTx) except numx_linalg.LinAlgError, exception: errstr = (str(exception) + "\n Input data may be redundant (i.e., some of the " + "variables may be linearly dependent).") raise NodeException(errstr)
def pseudo_inverse(self, y): """This function returns a pseudo-inverse of the execute frame. y == execute(x) is only ``True`` if y belongs to the domain of execute and has been computed with a sufficently large x. If gap > 1 some of the last rows will be filled with zeros. :param y: The execute frame. :type y: numpy.ndarray :return: A pseudo-inverse of the given frame. :rtype: numpy.ndarray """ self._if_training_stop_training() # set the output dimension if necessary if not self.output_dim: # if the input_dim is not defined, raise an exception if not self.input_dim: errstr = ("Number of input dimensions undefined. Inversion" "not possible.") raise NodeException(errstr) self.outputdim = self.input_dim # control the dimension of y self._check_output(y) # cast y = self._refcast(y) gap = self.gap exp_length = y.shape[0] cols = self.input_dim rest = (self.time_frames - 1) * gap rows = exp_length + rest x = numx.zeros((rows, cols), dtype=self.dtype) x[:exp_length, :] = y[:, :cols] count = 1 # Note that if gap > 1 some of the last rows will be filled with zeros! block_sz = min(gap, exp_length) for row in range(max(exp_length, gap), rows, gap): x[row:row + block_sz, :] = y[-block_sz:, count * cols:(count + 1) * cols] count += 1 return x
def _pre_inversion_checks(self, y): """This method contains all pre-inversion checks. It can be used when a subclass defines multiple inversion methods. """ if not self.is_invertible(): raise IsNotInvertibleException("This node is not invertible.") # set the output dimension if necessary if self.output_dim is None: # if the input_dim is not defined, raise an exception if self.input_dim is None: errstr = ("Number of input dimensions undefined. Inversion" "not possible.") raise NodeException(errstr) self.output_dim = self.input_dim # control the dimension of y self._check_output(y)
def __init__(self, noise_func=mdp.numx_rand.normal, noise_args=(0, 1), noise_type='additive', input_dim=None, output_dim=None, dtype=None): """Initializes an object of type 'NoiseNode'. :param noise_func: A function that generates noise. It must take a ``size`` keyword argument and return a random array of that size. Default is normal noise. :type noise_func: function :param noise_args: Tuple of additional arguments passed to `noise_func`. Default is (0,1) for (mean, standard deviation) of the normal distribution. :type noise_args: tuple :param noise_type: Either ``'additive'`` or ``'multiplicative'``. :type noise_type: str :param input_dim: The input dimensionality. :type input_dim: int :param output_dim: The output dimensionality. :type output_dim: int :param dtype: The datatype. :type dtype: numpy.dtype or str """ super(NoiseNode, self).__init__(input_dim=input_dim, output_dim=output_dim, dtype=dtype) self.noise_func = noise_func self.noise_args = noise_args valid_noise_types = ['additive', 'multiplicative'] if noise_type not in valid_noise_types: err_str = '%s is not a valid noise type' % str(noise_type) raise NodeException(err_str) else: self.noise_type = noise_type
def __init__(self, noise_func=mdp.numx_rand.normal, noise_args=(0, 1), noise_type='additive', input_dim=None, output_dim=None, dtype=None): """ Add noise to input signals. :Arguments: noise_func A function that generates noise. It must take a ``size`` keyword argument and return a random array of that size. Default is normal noise. noise_args Tuple of additional arguments passed to `noise_func`. Default is (0,1) for (mean, standard deviation) of the normal distribution. noise_type Either ``'additive'`` or ``'multiplicative'``. 'additive' returns ``x + noise``. 'multiplicative' returns ``x * (1 + noise)`` Default is ``'additive'``. """ super(NoiseNode, self).__init__(input_dim=input_dim, output_dim=output_dim, dtype=dtype) self.noise_func = noise_func self.noise_args = noise_args valid_noise_types = ['additive', 'multiplicative'] if noise_type not in valid_noise_types: err_str = '%s is not a valid noise type' % str(noise_type) raise NodeException(err_str) else: self.noise_type = noise_type
def generate_input(self, len_or_y=1, noise=False): """Generate data from the prior distribution. If the training phase has not been completed yet, call stop_training. :param len_or_y: If integer, it specified the number of observation to generate. If array, it is used as a set of samples of the latent variables :param noise: If true, generation includes the estimated noise :type noise: bool :return: The generated data. :rtype: numpy.ndarray """ self._if_training_stop_training() # set the output dimension if necessary if self.output_dim is None: # if the input_dim is not defined, raise an exception if self.input_dim is None: errstr = ("Number of input dimensions undefined. Inversion " "not possible.") raise NodeException(errstr) self.output_dim = self.input_dim if isinstance(len_or_y, int): size = (len_or_y, self.output_dim) y = self._refcast(mdp.numx_rand.normal(size=size)) else: y = self._refcast(len_or_y) self._check_output(y) res = mult(y, self.A.T) + self.mu if noise: ns = mdp.numx_rand.normal(size=(y.shape[0], self.input_dim)) ns *= numx.sqrt(self.sigma) res += self._refcast(ns) return res
def set_rank_deficit_method(self, rank_deficit_method): if rank_deficit_method == 'pca': self._symeig = symeig_semidefinite_pca elif rank_deficit_method == 'reg': self._symeig = symeig_semidefinite_reg elif rank_deficit_method == 'svd': self._symeig = symeig_semidefinite_svd elif rank_deficit_method == 'ldl': try: from scipy.linalg.lapack import dsytrf except ImportError: err_msg = ("ldl method for solving SFA with rank deficit covariance " "requires at least SciPy 1.0.") raise NodeException(err_msg) self._symeig = symeig_semidefinite_ldl elif rank_deficit_method == 'auto': self._symeig = symeig_semidefinite_pca elif rank_deficit_method == 'none': self._symeig = symeig else: raise ValueError("Invalid value for rank_deficit_method: %s" \ %str(rank_deficit_method))
def __init__(self, filters, input_shape=None, approach='fft', mode='full', boundary='fill', fillvalue=0, output_2d=True, input_dim=None, dtype=None): """ Input arguments: input_shape -- Is a tuple (h,w) that corresponds to the height and width of the input 2D data. If the input data is given in a flattened format, it is first reshaped before convolution approach -- 'approach' is one of ['linear', 'fft'] 'linear': convolution is done by linear filtering; 'fft': convoltion is done using the Fourier Transform If 'approach' is 'fft', the 'boundary' and 'fillvalue' arguments are ignored, and are assumed to be 'fill' and 0, respectively. (*Default* = 'fft') mode -- Convolution mode, as defined in scipy.signal.convolve2d 'mode' is one of ['valid', 'same', 'full'] (*Default* = 'full') boundary -- Boundary condition, as defined in scipy.signal.convolve2d 'boundary' is one of ['fill', 'wrap', 'symm'] (*Default* = 'fill') fillvalue -- Value to fill pad input arrays with (*Default* = 0) output_2d -- If True, the output array is 2D; the first index corresponds to data points; every output data point is the result of flattened convolution results, with the output of each filter concatenated together. If False, the output array is 4D; the format is data[idx,filter_nr,x,y], with filter_nr: index of convolution filter idx: data point index x, y: 2D coordinates """ super(Convolution2DNode, self).__init__(input_dim=input_dim, dtype=dtype) self.filters = filters self._input_shape = input_shape if approach not in ['linear', 'fft']: raise NodeException( "'approach' argument must be one of ['linear', 'fft']") self._approach = approach if mode not in ['valid', 'same', 'full']: raise NodeException( "'mode' argument must be one of ['valid', 'same', 'full']") self._mode = mode self.boundary = boundary self.fillvalue = fillvalue self.output_2d = output_2d self._output_shape = None
def __init__(self, lags=1, sfa_ica_coeff=(1., 1.), icaweights=None, sfaweights=None, whitened=False, white_comp = None, white_parm = None, eps_contrast=1e-6, max_iter=10000, RP=None, verbose=False, input_dim=None, output_dim=None, dtype=None): """ Perform Independent Slow Feature Analysis. The notation is the same used in the paper by Blaschke et al. Please refer to the paper for more information. :Parameters: lags list of time-lags to generate the time-delayed covariance matrices (in the paper this is the set of \tau). If lags is an integer, time-lags 1,2,...,'lags' are used. Note that time-lag == 0 (instantaneous correlation) is always implicitly used. sfa_ica_coeff a list of float with two entries, which defines the weights of the SFA and ICA part of the objective function. They are called b_{SFA} and b_{ICA} in the paper. sfaweights weighting factors for the covariance matrices relative to the SFA part of the objective function (called \kappa_{SFA}^{\tau} in the paper). Default is [1., 0., ..., 0.] For possible values see the description of icaweights. icaweights weighting factors for the cov matrices relative to the ICA part of the objective function (called \kappa_{ICA}^{\tau} in the paper). Default is 1. Possible values are: - an integer ``n``: all matrices are weighted the same (note that it does not make sense to have ``n != 1``) - a list or array of floats of ``len == len(lags)``: each element of the list is used for weighting the corresponding matrix - ``None``: use the default values. whitened ``True`` if input data is already white, ``False`` otherwise (the data will be whitened internally). white_comp If whitened is false, you can set ``white_comp`` to the number of whitened components to keep during the calculation (i.e., the input dimensions are reduced to ``white_comp`` by keeping the components of largest variance). white_parm a dictionary with additional parameters for whitening. It is passed directly to the WhiteningNode constructor. Ex: white_parm = { 'svd' : True } eps_contrast Convergence is achieved when the relative improvement in the contrast is below this threshold. Values in the range [1E-4, 1E-10] are usually reasonable. max_iter If the algorithms does not achieve convergence within max_iter iterations raise an Exception. Should be larger than 100. RP Starting rotation-permutation matrix. It is an input_dim x input_dim matrix used to initially rotate the input components. If not set, the identity matrix is used. In the paper this is used to start the algorithm at the SFA solution (which is often quite near to the optimum). verbose print progress information during convergence. This can slow down the algorithm, but it's the only way to see the rate of improvement and immediately spot if something is going wrong. output_dim sets the number of independent components that have to be extracted. Note that if this is not smaller than input_dim, the problem is solved linearly and SFA would give the same solution only much faster. """ # check that the "lags" argument has some meaningful value if isinstance(lags, (int, int)): lags = list(range(1, lags+1)) elif isinstance(lags, (list, tuple)): lags = numx.array(lags, "i") elif isinstance(lags, numx.ndarray): if not (lags.dtype.char in ['i', 'l']): err_str = "lags must be integer!" raise NodeException(err_str) else: pass else: err_str = ("Lags must be int, list or array. Found " "%s!" % (type(lags).__name__)) raise NodeException(err_str) self.lags = lags # sanity checks for weights if icaweights is None: self.icaweights = 1. else: if (len(icaweights) != len(lags)): err = ("icaweights vector length is %d, " "should be %d" % (str(len(icaweights)), str(len(lags)))) raise NodeException(err) self.icaweights = icaweights if sfaweights is None: self.sfaweights = [0]*len(lags) self.sfaweights[0] = 1. else: if (len(sfaweights) != len(lags)): err = ("sfaweights vector length is %d, " "should be %d" % (str(len(sfaweights)), str(len(lags)))) raise NodeException(err) self.sfaweights = sfaweights # store attributes self.sfa_ica_coeff = sfa_ica_coeff self.max_iter = max_iter self.verbose = verbose self.eps_contrast = eps_contrast # if input is not white, insert a WhiteningNode self.whitened = whitened if not whitened: if white_parm is None: white_parm = {} if output_dim is not None: white_comp = output_dim elif white_comp is not None: output_dim = white_comp self.white = WhiteningNode(input_dim=input_dim, output_dim=white_comp, dtype=dtype, **white_parm) # initialize covariance matrices self.covs = [ DelayCovarianceMatrix(dt, dtype=dtype) for dt in lags ] # initialize the global rotation-permutation matrix # if not set that we'll eventually be an identity matrix self.RP = RP # initialize verbose structure to print nice and useful progress info if verbose: info = { 'sweep' : max(len(str(self.max_iter)), 5), 'perturbe': max(len(str(self.max_iter)), 5), 'float' : 5+8, 'fmt' : "%.5e", 'sep' : " | "} f1 = "Sweep".center(info['sweep']) f1_2 = "Pertb". center(info['perturbe']) f2 = "SFA part".center(info['float']) f3 = "ICA part".center(info['float']) f4 = "Contrast".center(info['float']) header = info['sep'].join([f1, f1_2, f2, f3, f4]) info['header'] = header+'\n' info['line'] = len(header)*"-" self._info = info # finally call base class constructor super(ISFANode, self).__init__(input_dim, output_dim, dtype)
def _optimize(self): # optimize contrast function # save initial contrast sfa, ica = self._get_contrast(self.covs) self.initial_contrast = {'SFA': sfa, 'ICA': ica, 'TOT': sfa + ica} # info headers if self.verbose: print(self._info['header']+self._info['line']) # initialize control variables # contrast contrast = sfa+ica # local rotation matrix Q = self._get_eye() # local copy of correlation matrices covs = self.covs.copy() # maximum improvement in the contrast function max_increase = self.eps_contrast # Number of sweeps sweep = 0 # flag for stopping sweeping sweeping = True # flag to check if we already perturbed the outer space # - negative means that we exit from this routine # because we hit numerical precision or because # there's no outer space to be perturbed (input_dim == outpu_dim) # - positive means the number of perturbations done # before finding no further improvement perturbed = 0 # size of the perturbation matrix psize = self._effective_input_dim-self.output_dim # if there is no outer space don't perturbe if self._effective_input_dim == self.output_dim: perturbed = -1 # local eye matrix eye = self._get_eye() # main loop # we'll keep on sweeping until the contrast has improved less # then self.eps_contrast part_sweep = 0 while sweeping: # update number of sweeps sweep += 1 # perform a single sweep max_increase, covs, Q, contrast = self._do_sweep(covs, Q, contrast) if max_increase < 0 or contrast == 0: # we hit numerical precision, exit! sweeping = False if perturbed == 0: perturbed = -1 else: perturbed = -perturbed if (max_increase < self.eps_contrast) and (max_increase) >= 0 : # rate of change is small for all pairs in a sweep if perturbed == 0: # perturbe the outer space one time with a random rotation perturbed = 1 elif perturbed >= 1 and part_sweep == sweep-1: # after the last pertubation no useful step has # been done. exit! sweeping = False elif perturbed < 0: # we can't perturbe anymore sweeping = False # keep track of the last sweep we perturbed part_sweep = sweep # perform perturbation if needed if perturbed >= 1 and sweeping is True: # generate a random rotation matrix for the external subspace PRT = eye.copy() rot = self._get_rnd_rotation(psize) # generate a random permutation matrix for the ext. subspace perm = self._get_rnd_permutation(psize) # combine rotation and permutation rot_perm = mult(rot, perm) # apply rotation+permutation PRT[self.output_dim:, self.output_dim:] = rot_perm covs.transform(PRT) Q = mult(Q, PRT) # increment perturbation counter perturbed += 1 # verbose progress information if self.verbose: table_entry = self._fmt_prog_info(sweep, perturbed, contrast) _sys.stdout.write(table_entry+len(table_entry)*'\b') _sys.stdout.flush() # if we made too many sweeps exit with error! if sweep == self.max_iter: err_str = ("Failed to converge, maximum increase= " "%.5e" % (max_increase)) raise NodeException(err_str) # if we land here, we have converged! # calculate output contrast sfa, ica = self._get_contrast(covs) contrast = sfa+ica # print final information if self.verbose: print(self._fmt_prog_info(sweep, perturbed, contrast, sfa, ica)) print(self._info['line']) self.final_contrast = {'SFA': sfa, 'ICA': ica, 'TOT': sfa + ica} # finally return optimal rotation matrix return Q
def _stop_training(self, debug=False): # debug argument is ignored but needed by the base class super(NIPALSNode, self)._stop_training() self._adjust_output_dim() if self.desired_variance is not None: des_var = True else: des_var = False X = self.data conv = self.conv dtype = self.dtype mean = X.mean(axis=0) self.avg = mean max_it = self.max_it tlen = self.tlen # remove mean X -= mean var = X.var(axis=0).sum() self.total_variance = var exp_var = 0 eigenv = numx.zeros((self.input_dim, self.input_dim), dtype=dtype) d = numx.zeros((self.input_dim,), dtype = dtype) for i in range(self.input_dim): it = 0 # first score vector t is initialized to first column in X t = X[:, 0] # initialize difference diff = conv + 1 while diff > conv: # increase iteration counter it += 1 # Project X onto t to find corresponding loading p # and normalize loading vector p to length 1 p = old_div(mult(X.T, t),mult(t, t)) p /= sqrt(mult(p, p)) # project X onto p to find corresponding score vector t_new t_new = mult(X, p) # difference between new and old score vector tdiff = t_new - t diff = (tdiff*tdiff).sum() t = t_new if it > max_it: msg = ('PC#%d: no convergence after' ' %d iterations.'% (i, max_it)) raise NodeException(msg) # store ith eigenvector in result matrix eigenv[i, :] = p # remove the estimated principal component from X D = numx.outer(t, p) X -= D D = mult(D, p) d[i] = old_div((D*D).sum(),(tlen-1)) exp_var += old_div(d[i],var) if des_var and (exp_var >= self.desired_variance): self.output_dim = i + 1 break self.d = d[:self.output_dim] self.v = eigenv[:self.output_dim, :].T self.explained_variance = exp_var
def set_boundary(self, boundary): if boundary not in ['fill', 'wrap', 'symm']: raise NodeException( "'boundary' argument must be one of ['fill', 'wrap', 'symm']") self._boundary = boundary
def _set_output_dim(self, n): msg = 'Output dim can not be explicitly set!' raise NodeException(msg)
def __init__(self, filters, input_shape = None, approach = 'fft', mode = 'full', boundary = 'fill', fillvalue = 0, output_2d = True, input_dim = None, dtype = None): """Initializes an object of type 'Convolution2DNode'. :param filters: Specifies a set of 2D filters that are convolved with the input data during execution. :type filters: numpy.ndarray :param input_shape: Is a tuple (h,w) that corresponds to the height and width of the input 2D data. If the input data is given in a flattened format, it is first reshaped before convolution :type input_shape: tuple :param approach: 'approach' is one of ['linear', 'fft'] - 'linear': convolution is done by linear filtering; - 'fft': convoltion is done using the Fourier Transform If 'approach' is 'fft', the 'boundary' and 'fillvalue' arguments are ignored, and are assumed to be 'fill' and 0, respectively. (*Default* = 'fft') :type approach: str :param mode: Convolution mode, as defined in scipy.signal.convolve2d 'mode' is one of ['valid', 'same', 'full'] (*Default* = 'full') :type mode: str :param boundary: Boundary condition, as defined in scipy.signal.convolve2d 'boundary' is one of ['fill', 'wrap', 'symm'] (*Default* = 'fill') :type boundary: str :param fillvalue: Value to fill pad input arrays with (*Default* = 0) :type fillvalue: numeric :param output_2d: If True, the output array is 2D; the first index corresponds to data points; every output data point is the result of flattened convolution results, with the output of each filter concatenated together. If False, the output array is 4D; the format is data[idx,filter_nr,x,y], with - filter_nr: index of convolution filter - idx: data point index - x, y: 2D coordinates :type output_2d: bool :param input_dim: The input dimensionality. :type input_dim: int :param dtype: The datatype. :type dtype: numpy.dtype or str """ super(Convolution2DNode, self).__init__(input_dim=input_dim, dtype=dtype) self.filters = filters self._input_shape = input_shape if approach not in ['linear', 'fft']: raise NodeException("'approach' argument must be one of ['linear', 'fft']") self._approach = approach if mode not in ['valid', 'same', 'full']: raise NodeException("'mode' argument must be one of ['valid', 'same', 'full']") self._mode = mode self.boundary = boundary self.fillvalue = fillvalue self.output_2d = output_2d self._output_shape = None
def _stop_training(self): #### some definitions verbose = self.verbose typ = self.dtype tol = self.tol d = self.input_dim # if the number of latent variables is not specified, # set it equal to the number of input components if not self.output_dim: self.output_dim = d k = self.output_dim # indices of the diagonal elements of a dxd or kxk matrix idx_diag_d = [i * (d + 1) for i in range(d)] idx_diag_k = [i * (k + 1) for i in range(k)] # constant term in front of the log-likelihood const = -d / 2. * numx.log(2. * numx.pi) ##### request the covariance matrix and clean up cov_mtx, mu, tlen = self._cov_mtx.fix() del self._cov_mtx cov_diag = cov_mtx.diagonal() ##### initialize the parameters # noise variances sigma = cov_diag # loading factors # Zoubin uses the determinant of cov_mtx^1/d as scale but it's # too slow for large matrices. Is the product of the diagonal a good # approximation? if d <= 300: scale = det(cov_mtx)**(1. / d) else: scale = numx.product(sigma)**(1. / d) if scale <= 0.: err = ("The covariance matrix of the data is singular. " "Redundant dimensions need to be removed.") raise NodeException(err) A = normal(0., sqrt(scale / k), size=(d, k)).astype(typ) ##### EM-cycle lhood_curve = [] base_lhood = None old_lhood = -numx.inf for t in xrange(self.max_cycles): ## compute B = (A A^T + Sigma)^-1 B = mult(A, A.T) # B += diag(sigma), avoid computing diag(sigma) which is dxd B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d) + sigma) # this quantity is used later for the log-likelihood # abs is there to avoid numerical errors when det < 0 log_det_B = numx.log(abs(det(B))) # end the computation of B B = inv(B) ## other useful quantities trA_B = mult(A.T, B) trA_B_cov_mtx = mult(trA_B, cov_mtx) ##### E-step ## E_yyT = E(y_n y_n^T | x_n) E_yyT = -mult(trA_B, A) + mult(trA_B_cov_mtx, trA_B.T) # E_yyT += numx.eye(k) E_yyT.ravel().put(idx_diag_k, E_yyT.ravel().take(idx_diag_k) + 1.) ##### M-step A = mult(trA_B_cov_mtx.T, inv(E_yyT)) sigma = cov_diag - (mult(A, trA_B_cov_mtx)).diagonal() ##### log-likelihood trace_B_cov = (B * cov_mtx.T).sum() # this is actually likelihood/tlen. lhood = const - 0.5 * log_det_B - 0.5 * trace_B_cov if verbose: print 'cycle', t, 'log-lhood:', lhood ##### convergence criterion if base_lhood is None: base_lhood = lhood else: # convergence criterion if (lhood - base_lhood) < (1. + tol) * (old_lhood - base_lhood): break if lhood < old_lhood: # this should never happen # it sometimes does, e.g. if the noise is extremely low, # because of numerical rounding effects warnings.warn(_LHOOD_WARNING, mdp.MDPWarning) old_lhood = lhood lhood_curve.append(lhood) self.tlen = tlen self.A = A self.mu = mu.reshape(1, d) self.sigma = sigma ## MAP matrix # compute B = (A A^T + Sigma)^-1 B = mult(A, A.T).copy() B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d) + sigma) B = inv(B) self.E_y_mtx = mult(B.T, A) self.lhood = lhood_curve