def _sample_v(self, h, sample_l=False, concatenate=True): # returns P(v=1|h,W,b), a sample from it, P(l=1|h,W,b), # and a sample from it ldim, vdim = self._labels_dim, self._visible_dim # activation a = self.bv + mult(h, self.w.T) av, al = a[:, :vdim], a[:, vdim:] # ## visible units: logistic activation probs_v = old_div(1., (1. + exp(-av))) v = (probs_v > random(probs_v.shape)).astype('d') # ## label units: softmax activation # subtract maximum to regularize exponent exponent = al - rrep(al.max(axis=1), ldim) probs_l = exp(exponent) probs_l /= rrep(probs_l.sum(axis=1), ldim) if sample_l: # ?? todo: I'm sure this can be optimized l = numx.zeros((h.shape[0], ldim)) for t in range(h.shape[0]): l[t, :] = mdp.numx_rand.multinomial(1, probs_l[t, :]) else: l = probs_l.copy() if concatenate: probs = numx.concatenate((probs_v, probs_l), axis=1) x = numx.concatenate((v, l), axis=1) return probs, x else: return probs_v, probs_l, v, l
def _sample_v(self, h, sample_l=False, concatenate=True): # returns P(v=1|h,W,b), a sample from it, P(l=1|h,W,b), # and a sample from it ldim, vdim = self._labels_dim, self._visible_dim # activation a = self.bv + mult(h, self.w.T) av, al = a[:, :vdim], a[:, vdim:] # ## visible units: logistic activation probs_v = old_div(1.,(1. + exp(-av))) v = (probs_v > random(probs_v.shape)).astype('d') # ## label units: softmax activation # subtract maximum to regularize exponent exponent = al - rrep(al.max(axis=1), ldim) probs_l = exp(exponent) probs_l /= rrep(probs_l.sum(axis=1), ldim) if sample_l: # ?? todo: I'm sure this can be optimized l = numx.zeros((h.shape[0], ldim)) for t in range(h.shape[0]): l[t, :] = mdp.numx_rand.multinomial(1, probs_l[t, :]) else: l = probs_l.copy() if concatenate: probs = numx.concatenate((probs_v, probs_l), axis=1) x = numx.concatenate((v, l), axis=1) return probs, x else: return probs_v, probs_l, v, l
def _stop_training(self): """Organize the sample data.""" ordered_samples = [] for label in self._label_samples: ordered_samples.append(numx.concatenate(self._label_samples[label])) self.ordered_labels.append(label) del self._label_samples self.samples = numx.concatenate(ordered_samples) self.n_samples = len(self.samples) self.sample_label_indices = numx.concatenate( [numx.ones(len(ordered_samples[i]), dtype="int32") * i for i in range(len(self.ordered_labels))] )
def _stop_training(self): """Organize the sample data.""" ordered_samples = [] for label in self._label_samples: ordered_samples.append(numx.concatenate( self._label_samples[label])) self.ordered_labels.append(label) del self._label_samples self.samples = numx.concatenate(ordered_samples) self.n_samples = len(self.samples) self.sample_label_indices = numx.concatenate([ numx.ones(len(ordered_samples[i]), dtype="int32") * i for i in range(len(self.ordered_labels)) ])
def train(self, v, l, n_updates=1, epsilon=0.1, decay=0.0, momentum=0.0, verbose=False): """Update the internal structures according to the visible data `v` and the labels `l`. The training is performed using Contrastive Divergence (CD). :Parameters: v a binary matrix having different variables on different columns and observations on the rows l a binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. n_updates number of CD iterations. Default value: 1 epsilon learning rate. Default value: 0.1 decay weight decay term. Default value: 0. momentum momentum term. Default value: 0. """ if not self.is_training(): errstr = "The training phase has already finished." raise mdp.TrainingFinishedException(errstr) x = numx.concatenate((v, l), axis=1) self._check_input(x) self._train_phase_started = True self._train_seq[self._train_phase][0]( self._refcast(x), n_updates=n_updates, epsilon=epsilon, decay=decay, momentum=momentum, verbose=verbose )
def execute(self, v, l, return_probs=True): """If `return_probs` is True, returns the probability of the hidden variables h[n,i] being 1 given the observations v[n,:] and l[n,:]. If `return_probs` is False, return a sample from that probability. :param v: A binary matrix having different variables on different columns and observations on the rows. :type v: numpy.ndarray :param l: The labels. A binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. :type l: numpy.ndarray :param return_probs: Controls the return value. Default value: True :type return_probs: bool :return: The probability of the hidden variables being 1 given the observations and labels or a sample from that probability. :rtype: float """ x = numx.concatenate((v, l), axis=1) self._pre_execution_checks(x) probs, h = self._sample_h(self._refcast(x)) if return_probs: return probs else: return h
def _join(self, forked_node): if (self.data_hist is not None) and (forked_node.data_hist is not None): self.data_hist = numx.concatenate( [self.data_hist, forked_node.data_hist]) elif forked_node.data_hist is not None: self.data_hist = forked_node.data_hist
def _calculate_gradient(self, y): x = self._last_x dy = Oger.utils.LogisticFunction.df(x, self._last_y) * y dw = mult(x.T, dy) self._gradient_vector = numx.concatenate((dw.ravel(), dy.sum(axis=0))) dx = mult(self.w, dy.T).T return dx
def execute(self, v, l, return_probs = True): """If `return_probs` is True, returns the probability of the hidden variables h[n,i] being 1 given the observations v[n,:] and l[n,:]. If `return_probs` is False, return a sample from that probability. :param v: A binary matrix having different variables on different columns and observations on the rows. :type v: numpy.ndarray :param l: The labels. A binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. :type l: numpy.ndarray :param return_probs: Controls the return value. Default value: True :type return_probs: bool :return: The probability of the hidden variables being 1 given the observations and labels or a sample from that probability. :rtype: float """ x = numx.concatenate((v, l), axis=1) self._pre_execution_checks(x) probs, h = self._sample_h(self._refcast(x)) if return_probs: return probs else: return h
def use_results(self, results): """Use the result from the scheduler. During parallel training this will start the next training phase. For parallel execution this will return the result, like a normal execute would. results -- Iterable containing the results, normally the return value of scheduler.ResultContainer.get_results(). The individual results can be the return values of the tasks. """ if self.is_parallel_training: for result in results: # the flownode contains the original nodes self._flownode.join(result) if self.verbose: print ("finished parallel training phase of node no. " + "%d in parallel flow" % (self._i_train_node+1)) self._stop_training_hook() self._flownode.stop_training() self._post_stop_training_hook() if not self.flow[self._i_train_node].is_training(): self._i_train_node += 1 self._next_train_phase() elif self.is_parallel_executing: self._exec_data_iterator = None ys = [result[0] for result in results] if self._flownode.use_execute_fork(): flownodes = [result[1] for result in results] for flownode in flownodes: if flownode is not None: self._flownode.join(flownode) return n.concatenate(ys)
def inverse(self, iterable): """Process the data through all nodes in the flow backwards (starting from the last node up to the first node) by calling the inverse function of each node. Of course, all nodes in the flow must be invertible. 'iterable' is an iterable or iterator (note that a list is also an iterable), which returns data arrays that are used as input to the flow. Alternatively, one can specify one data array as input. Note that this is _not_ equivalent to 'flow[::-1](iterable)', which also executes the flow backwards but calls the 'execute' function of each node.""" if isinstance(iterable, numx.ndarray): return self._inverse_seq(iterable) res = [] empty_iterator = True for x in iterable: empty_iterator = False res.append(self._inverse_seq(x)) if empty_iterator: errstr = ("The inverse data iterator is empty.") raise FlowException(errstr) return numx.concatenate(res)
def _add_constant(self, x): """Add a constant term to the vector 'x'. x -> [1 x] """ return numx.concatenate((numx.ones( (x.shape[0], 1), dtype=self.dtype), x), axis=1)
def use_results(self, results): """Use the result from the scheduler. During parallel training this will start the next training phase. For parallel execution this will return the result, like a normal execute would. results -- Iterable containing the results, normally the return value of scheduler.ResultContainer.get_results(). The individual results can be the return values of the tasks. """ if self.is_parallel_training: for result in results: # the flownode contains the original nodes self._flownode.join(result) if self.verbose: print("finished parallel training phase of node no. " + "%d in parallel flow" % (self._i_train_node + 1)) self._stop_training_hook() self._flownode.stop_training() self._post_stop_training_hook() if not self.flow[self._i_train_node].is_training(): self._i_train_node += 1 self._next_train_phase() elif self.is_parallel_executing: self._exec_data_iterator = None ys = [result[0] for result in results] if self._flownode.use_execute_fork(): flownodes = [result[1] for result in results] for flownode in flownodes: if flownode is not None: self._flownode.join(flownode) return n.concatenate(ys)
def _train(self, x): """Store the history data.""" if self.hist_fraction < 1.0: x = x[numx.random.random(len(x)) < self.hist_fraction] if self.data_hist is not None: self.data_hist = numx.concatenate([self.data_hist, x]) else: self.data_hist = x
def _calculate_gradient(self, y): ''' y is the gradient that is propagated from the previous layer''' x = self._last_x dy = self.transfer_func.df(x, self._last_y) * y dw = mult(x.T, dy) self._gradient_vector = numx.concatenate((dw.ravel(), dy.sum(axis=0))) dx = mult(self.w, dy.T).T return dx
def _params(self): """Return the current parameters of the nodes.""" params = numx.array([]) for n in traverseHinet(self.gflow): if hasattr(n, '_param_size') and n._param_size() > 0: params = numx.concatenate((params, n.params())) return params
def sample_h(self, v, l): """Sample the hidden variables given observations `v` and labels `l`. :Returns: a tuple ``(prob_h, h)``, where ``prob_h[n,i]`` is the probability that variable ``i`` is one given the observations ``v[n,:]`` and the labels ``l[n,:]``, and ``h[n,i]`` is a sample from the posterior probability.""" x = numx.concatenate((v, l), axis=1) self._pre_execution_checks(x) return self._sample_h(x)
def _add_constant(self, x): """Add a constant term to the vector 'x'. x -> [1 x] :param x: The vector a constant term is appended to. :type x: numpy.ndarray :return: The altered vector. :rtype: numpy.ndarray """ return numx.concatenate((numx.ones((x.shape[0], 1), dtype=self.dtype), x), axis=1)
def _gradient(self): """Get the gradient with respect to the parameters. This gradient has been calculated during the last backprop sweep. """ gradient = numx.array([]) for n in traverseHinet(self.gflow): if hasattr(n, '_param_size') and n._param_size() > 0: gradient = numx.concatenate((gradient, n.gradient())) return gradient
def train(self, v, l, n_updates=1, epsilon=0.1, decay=0., momentum=0., verbose=False): """Update the internal structures according to the visible data `v` and the labels `l`. The training is performed using Contrastive Divergence (CD). :param v: A binary matrix having different variables on different columns and observations on the rows. :type v: numpy.ndarray :param l: A binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. :type l: numpy.ndarray :param n_updates: Number of CD iterations. Default value: 1 :type n_updates: int :param epsilon: Learning rate. Default value: 0.1 :type epsilon: float :param decay: Weight decay term. Default value: 0. :type decay: float :param momentum: Momentum term. Default value: 0. :type momentum: float :param verbose: Controls the verbosity. :type verbose: bool """ if not self.is_training(): errstr = "The training phase has already finished." raise mdp.TrainingFinishedException(errstr) x = numx.concatenate((v, l), axis=1) self._check_input(x) self._train_phase_started = True self._train_seq[self._train_phase][0](self._refcast(x), n_updates=n_updates, epsilon=epsilon, decay=decay, momentum=momentum, verbose=verbose)
def switching_signals(f1, f2, T, n_switches, n_samples=1): samples = [] # seconds per simulation timestep t = numx.arange(T) proto_1 = numx.atleast_2d(numx.sin(2 * numx.pi * t * f1)).T proto_2 = numx.atleast_2d(numx.sin(2 * numx.pi * t * f2)).T for _ in range(n_samples): n_periods1 = numx.random.randint(4, 8, size=(n_switches)) n_periods2 = numx.random.randint(4, 8, size=(n_switches)) # n_periods1, n_periods2 = [1], [0] switch = [] signal = [] for p1, p2 in zip(n_periods1, n_periods2): switch.extend([numx.ones_like(proto_1)] * p1) switch.extend([-1 * numx.ones_like(proto_2)] * p2) signal.extend([proto_1] * p1) signal.extend([proto_2] * p2) samples.append([numx.concatenate((numx.concatenate(switch), numx.concatenate(signal)), 1)]) return samples
def switching_signals(f1, f2, T, n_switches, n_samples=1): samples = [] # seconds per simulation timestep t = numx.arange(T) proto_1 = numx.atleast_2d(numx.sin(2 * numx.pi * t * f1)).T proto_2 = numx.atleast_2d(numx.sin(2 * numx.pi * t * f2)).T for _ in range(n_samples): n_periods1 = numx.random.randint(4, 8, size=(n_switches)) n_periods2 = numx.random.randint(4, 8, size=(n_switches)) #n_periods1, n_periods2 = [1], [0] switch = [] signal = [] for p1, p2 in zip(n_periods1, n_periods2): switch.extend([numx.ones_like(proto_1)] * p1) switch.extend([-1 * numx.ones_like(proto_2)] * p2) signal.extend([proto_1] * p1) signal.extend([proto_2] * p2) samples.append([numx.concatenate((numx.concatenate(switch), numx.concatenate(signal)), 1)]) return samples
def execute(self, v, l, return_probs=True): """If `return_probs` is True, returns the probability of the hidden variables h[n,i] being 1 given the observations v[n,:] and l[n,:]. If `return_probs` is False, return a sample from that probability. """ x = numx.concatenate((v, l), axis=1) self._pre_execution_checks(x) probs, h = self._sample_h(self._refcast(x)) if return_probs: return probs else: return h
def test_NormalizingRecursiveExpansionNode(): """Essentially testing the domain transformation.""" degree = 10 episodes = 5 num_obs = 500 num_vars = 4 for func_name in recfs: x = np.zeros((0, num_vars)) expn = NormalizingRecursiveExpansionNode(degree, recf=func_name, check=True, with0=True) for i in range(episodes): chunk = (np.random.rand(num_obs, num_vars)-0.5)*1000 expn.train(chunk) x = np.concatenate((x, chunk), axis=0) expn.stop_training() expn.execute(x)
def train(self, v, l, n_updates=1, epsilon=0.1, decay=0., momentum=0., verbose=False): """Update the internal structures according to the visible data `v` and the labels `l`. The training is performed using Contrastive Divergence (CD). :Parameters: v a binary matrix having different variables on different columns and observations on the rows l a binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. n_updates number of CD iterations. Default value: 1 epsilon learning rate. Default value: 0.1 decay weight decay term. Default value: 0. momentum momentum term. Default value: 0. """ if not self.is_training(): errstr = "The training phase has already finished." raise mdp.TrainingFinishedException(errstr) x = numx.concatenate((v, l), axis=1) self._check_input(x) self._train_phase_started = True self._train_seq[self._train_phase][0](self._refcast(x), n_updates=n_updates, epsilon=epsilon, decay=decay, momentum=momentum, verbose=verbose)
def execute(self, iterable, nodenr = None): """Process the data through all nodes in the flow. 'iterable' is an iterable or iterator (note that a list is also an iterable), which returns data arrays that are used as input to the flow. Alternatively, one can specify one data array as input. If 'nodenr' is specified, the flow is executed only up to node nr. 'nodenr'. This is equivalent to 'flow[:nodenr+1](iterable)'. """ if isinstance(iterable, numx.ndarray): return self._execute_seq(iterable, nodenr) res = [] empty_iterator = True for x in iterable: empty_iterator = False res.append(self._execute_seq(x, nodenr)) if empty_iterator: errstr = ("The execute data iterator is empty.") raise FlowException(errstr) return numx.concatenate(res)
def energy(self, v, h, l): """Compute the energy of the RBM given observed variables state `v` and `l`, and hidden variables state `h`. :param v: A binary matrix having different variables on different columns and observations on the rows. :type v: numpy.ndarray :param l: The labels. A binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. :type l: numpy.ndarray :param h: The hidden variable state h. :type h: numpy.ndarray :return: The energy of the RBM given observed and the hidden variables. :rtype: float """ x = numx.concatenate((v, l), axis=1) return self._energy(x, h)
def sample_h(self, v, l): """Sample the hidden variables given observations v and labels `l`. :param v: A binary matrix having different variables on different columns and observations on the rows. :type v: numpy.ndarray :param l: The labels. A binary matrix having different variables on different columns and observations on the rows. Only one value per row should be 1. :type l: numpy.ndarray :returns: A tuple ``(prob_h, h)``, where ``prob_h[n,i]`` is the probability that variable ``i`` is one given the observations ``v[n,:]`` and the labels ``l[n,:]``, and ``h[n,i]`` is a sample from the posterior probability. :rtype: tuple """ x = numx.concatenate((v, l), axis=1) self._pre_execution_checks(x) return self._sample_h(x)
def _params(self): return numx.concatenate((self.w.ravel(), self.bh.ravel()))
def _stop_training(self, *args, **kwargs): """Concatenate the collected data in a single array.""" for field in self._cumulator_fields: data = getattr(self, field) setattr(self, field, numx.concatenate(data, 0))
def _add_constant(self, x): """Add a constant term to the vector 'x'. x -> [1 x] """ return numx.concatenate((numx.ones((x.shape[0], 1), dtype=self.dtype), x), axis=1)
def _stop_training(self): Cumulator._stop_training(self) k = self.k M = self.data N = M.shape[0] if k > N: err = ('k=%i must be less than' ' or equal to number of training points N=%i' % (k, N)) raise TrainingException(err) if self.verbose: print 'performing HLLE on %i points in %i dimensions...' % M.shape # determines number of output dimensions: if desired_variance # is specified, we need to learn it from the data. Otherwise, # it's easy learn_outdim = False if self.output_dim is None: if self.desired_variance is None: self.output_dim = self.input_dim else: learn_outdim = True # determine number of output dims, precalculate useful stuff if learn_outdim: Qs, sig2s, nbrss = self._adjust_output_dim() d_out = self.output_dim #dp = d_out + (d_out-1) + (d_out-2) + ... dp = d_out * (d_out + 1) / 2 if min(k, N) <= d_out: err = ('k=%i and n=%i (number of input data points) must be' ' larger than output_dim=%i' % (k, N, d_out)) raise TrainingException(err) if k < 1 + d_out + dp: wrn = ('The number of neighbours, k=%i, is smaller than' ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,' ' which might result in unstable results.' % (k, 1 + d_out + dp)) _warnings.warn(wrn, MDPWarning) #build the weight matrix #XXX for faster implementation, W should be a sparse matrix W = numx.zeros((N, dp * N), dtype=self.dtype) if self.verbose: print ' - constructing [%i x %i] weight matrix...' % W.shape for row in range(N): if learn_outdim: nbrs = nbrss[row, :] else: # ----------------------------------------------- # find k nearest neighbors # ----------------------------------------------- M_Mi = M - M[row] nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1] #----------------------------------------------- # center the neighborhood using the mean #----------------------------------------------- nbrhd = M[nbrs] # this makes a copy nbrhd -= nbrhd.mean(0) #----------------------------------------------- # compute local coordinates # using a singular value decomposition #----------------------------------------------- U, sig, VT = svd(nbrhd) nbrhd = U.T[:d_out] del VT #----------------------------------------------- # build Hessian estimator #----------------------------------------------- Yi = numx.zeros((dp, k), dtype=self.dtype) ct = 0 for i in range(d_out): Yi[ct:ct + d_out - i, :] = nbrhd[i] * nbrhd[i:, :] ct += d_out - i Yi = numx.concatenate( [numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0) #----------------------------------------------- # orthogonalize linear and quadratic forms # with QR factorization # and make the weights sum to 1 #----------------------------------------------- if k >= 1 + d_out + dp: Q, R = numx_linalg.qr(Yi.T) w = Q[:, d_out + 1:d_out + 1 + dp] else: q, r = _mgs(Yi.T) w = q[:, -dp:] S = w.sum(0) #sum along columns #if S[i] is too small, set it equal to 1.0 # this prevents weights from blowing up S[numx.where(numx.absolute(S) < 1E-4)] = 1.0 #print w.shape, S.shape, (w/S).shape #print W[nbrs, row*dp:(row+1)*dp].shape W[nbrs, row * dp:(row + 1) * dp] = w / S #----------------------------------------------- # To find the null space, we want the # first d+1 eigenvectors of W.T*W # Compute this using an svd of W #----------------------------------------------- if self.verbose: msg = (' - finding [%i x %i] ' 'null space of weight matrix...' % (d_out, N)) print msg #XXX future work: #XXX use of upcoming ARPACK interface for bottom few eigenvectors #XXX of a sparse matrix will significantly increase the speed #XXX of the next step if self.svd: sig, U = nongeneral_svd(W.T, range=(2, d_out + 1)) Y = U * numx.sqrt(N) else: WW = mult(W, W.T) # regularizes the eigenvalues, does not change the eigenvectors: W_diag_idx = numx.arange(N) WW[W_diag_idx, W_diag_idx] += 0.01 sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True) Y = U * numx.sqrt(N) del WW del W #----------------------------------------------- # Normalize Y # # Alternative way to do it: # we need R = (Y.T*Y)^(-1/2) # do this with an SVD of Y del VT # Y = U*sig*V.T # Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T) # = V * (sig*sig.T) * V.T # = V * sig^2 V.T # so # R = V * sig^-1 * V.T # The code is: # U, sig, VT = svd(Y) # del U # S = numx.diag(sig**-1) # self.training_projection = mult(Y, mult(VT.T, mult(S, VT))) #----------------------------------------------- if self.verbose: print ' - normalizing null space...' C = sqrtm(mult(Y.T, Y)) self.training_projection = mult(Y, C)
def energy(self, v, h, l): """Compute the energy of the RBM given observed variables state `v` and `l`, and hidden variables state `h`.""" x = numx.concatenate((v, l), axis=1) return self._energy(x, h)
def _stop_training(self): Cumulator._stop_training(self) k = self.k M = self.data N = M.shape[0] if k > N: err = ('k=%i must be less than' ' or equal to number of training points N=%i' % (k, N)) raise TrainingException(err) if self.verbose: print 'performing HLLE on %i points in %i dimensions...' % M.shape # determines number of output dimensions: if desired_variance # is specified, we need to learn it from the data. Otherwise, # it's easy learn_outdim = False if self.output_dim is None: if self.desired_variance is None: self.output_dim = self.input_dim else: learn_outdim = True # determine number of output dims, precalculate useful stuff if learn_outdim: Qs, sig2s, nbrss = self._adjust_output_dim() d_out = self.output_dim #dp = d_out + (d_out-1) + (d_out-2) + ... dp = d_out*(d_out+1)/2 if min(k, N) <= d_out: err = ('k=%i and n=%i (number of input data points) must be' ' larger than output_dim=%i' % (k, N, d_out)) raise TrainingException(err) if k < 1+d_out+dp: wrn = ('The number of neighbours, k=%i, is smaller than' ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,' ' which might result in unstable results.' % (k, 1+d_out+dp)) _warnings.warn(wrn, MDPWarning) #build the weight matrix #XXX for faster implementation, W should be a sparse matrix W = numx.zeros((N, dp*N), dtype=self.dtype) if self.verbose: print ' - constructing [%i x %i] weight matrix...' % W.shape for row in range(N): if learn_outdim: nbrs = nbrss[row, :] else: # ----------------------------------------------- # find k nearest neighbors # ----------------------------------------------- M_Mi = M-M[row] nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1] #----------------------------------------------- # center the neighborhood using the mean #----------------------------------------------- nbrhd = M[nbrs] # this makes a copy nbrhd -= nbrhd.mean(0) #----------------------------------------------- # compute local coordinates # using a singular value decomposition #----------------------------------------------- U, sig, VT = svd(nbrhd) nbrhd = U.T[:d_out] del VT #----------------------------------------------- # build Hessian estimator #----------------------------------------------- Yi = numx.zeros((dp, k), dtype=self.dtype) ct = 0 for i in range(d_out): Yi[ct:ct+d_out-i, :] = nbrhd[i] * nbrhd[i:, :] ct += d_out-i Yi = numx.concatenate([numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0) #----------------------------------------------- # orthogonalize linear and quadratic forms # with QR factorization # and make the weights sum to 1 #----------------------------------------------- if k >= 1+d_out+dp: Q, R = numx_linalg.qr(Yi.T) w = Q[:, d_out+1:d_out+1+dp] else: q, r = _mgs(Yi.T) w = q[:, -dp:] S = w.sum(0) #sum along columns #if S[i] is too small, set it equal to 1.0 # this prevents weights from blowing up S[numx.where(numx.absolute(S)<1E-4)] = 1.0 #print w.shape, S.shape, (w/S).shape #print W[nbrs, row*dp:(row+1)*dp].shape W[nbrs, row*dp:(row+1)*dp] = w / S #----------------------------------------------- # To find the null space, we want the # first d+1 eigenvectors of W.T*W # Compute this using an svd of W #----------------------------------------------- if self.verbose: msg = (' - finding [%i x %i] ' 'null space of weight matrix...' % (d_out, N)) print msg #XXX future work: #XXX use of upcoming ARPACK interface for bottom few eigenvectors #XXX of a sparse matrix will significantly increase the speed #XXX of the next step if self.svd: sig, U = nongeneral_svd(W.T, range=(2, d_out+1)) Y = U*numx.sqrt(N) else: WW = mult(W, W.T) # regularizes the eigenvalues, does not change the eigenvectors: W_diag_idx = numx.arange(N) WW[W_diag_idx, W_diag_idx] += 0.01 sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True) Y = U*numx.sqrt(N) del WW del W #----------------------------------------------- # Normalize Y # # Alternative way to do it: # we need R = (Y.T*Y)^(-1/2) # do this with an SVD of Y del VT # Y = U*sig*V.T # Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T) # = V * (sig*sig.T) * V.T # = V * sig^2 V.T # so # R = V * sig^-1 * V.T # The code is: # U, sig, VT = svd(Y) # del U # S = numx.diag(sig**-1) # self.training_projection = mult(Y, mult(VT.T, mult(S, VT))) #----------------------------------------------- if self.verbose: print ' - normalizing null space...' C = sqrtm(mult(Y.T, Y)) self.training_projection = mult(Y, C)
def makeSimplex(tensor): simplex = np.concatenate( (tensor[:, 0, 0, 0], tensor[0, 1:, 0, 0], tensor[0, 0, 1:, 0], tensor[0, 0, 0, 1:])) x1 = tensor[1, 0, 0, 0] x2 = tensor[2, 0, 0, 0] x3 = tensor[3, 0, 0, 0] y1 = tensor[0, 1, 0, 0] y2 = tensor[0, 2, 0, 0] y3 = tensor[0, 3, 0, 0] z1 = tensor[0, 0, 1, 0] z2 = tensor[0, 0, 2, 0] z3 = tensor[0, 0, 3, 0] w1 = tensor[0, 0, 0, 1] w2 = tensor[0, 0, 0, 2] w3 = tensor[0, 0, 0, 3] simplex = np.concatenate((simplex, x1 * np.array([y1, y2, y3]))) simplex = np.concatenate((simplex, x2 * np.array([y1, y2]))) simplex = np.concatenate((simplex, np.array([x3 * y1]))) simplex = np.concatenate((simplex, x1 * np.array([z1, z2, z3]))) simplex = np.concatenate((simplex, x2 * np.array([z1, z2]))) simplex = np.concatenate((simplex, x3 * np.array([z1]))) simplex = np.concatenate((simplex, y1 * np.array([z1, z2, z3]))) simplex = np.concatenate((simplex, y2 * np.array([z1, z2]))) simplex = np.concatenate((simplex, y3 * np.array([z1]))) simplex = np.concatenate((simplex, x1 * np.array([w1, w2, w3]))) simplex = np.concatenate((simplex, x2 * np.array([w1, w2]))) simplex = np.concatenate((simplex, x3 * np.array([w1]))) simplex = np.concatenate((simplex, y1 * np.array([w1, w2, w3]))) simplex = np.concatenate((simplex, y2 * np.array([w1, w2]))) simplex = np.concatenate((simplex, y3 * np.array([w1]))) simplex = np.concatenate((simplex, z1 * np.array([w1, w2, w3]))) simplex = np.concatenate((simplex, z2 * np.array([w1, w2]))) simplex = np.concatenate((simplex, z3 * np.array([w1]))) simplex = np.concatenate((simplex, x1 * np.array([y1*z1, y1*z2]))) simplex = np.concatenate((simplex, x2 * np.array([y1*z1]))) simplex = np.concatenate((simplex, x1 * np.array([y2*z1]))) simplex = np.concatenate((simplex, x1 * np.array([y1*w1, y1*w2]))) simplex = np.concatenate((simplex, x2 * np.array([y1*w1]))) simplex = np.concatenate((simplex, x1 * np.array([y2*w1]))) simplex = np.concatenate((simplex, x1 * np.array([z1*w1, z1*w2]))) simplex = np.concatenate((simplex, x2 * np.array([z1*w1]))) simplex = np.concatenate((simplex, y1 * np.array([z1*w1, z1*w2]))) simplex = np.concatenate((simplex, y2 * np.array([z1*w1]))) simplex = np.concatenate((simplex, x1 * np.array([z2*w1]))) simplex = np.concatenate((simplex, y1 * np.array([z2*w1]))) simplex = np.concatenate((simplex, x1 * np.array([y1*z1*w1]))) return simplex
def _params(self): return numx.concatenate( (self.w_in.ravel(), self.w_bias.ravel(), self.w.ravel()))
def _join(self, forked_node): if (self.data_hist is not None) and (forked_node.data_hist is not None): self.data_hist = numx.concatenate([self.data_hist, forked_node.data_hist]) elif forked_node.data_hist != None: self.data_hist = forked_node.data_hist
def makeSimplex(tensor): simplex = np.concatenate( (tensor[:, 0, 0, 0], tensor[0, 1:, 0, 0], tensor[0, 0, 1:, 0], tensor[0, 0, 0, 1:])) x1 = tensor[1, 0, 0, 0] x2 = tensor[2, 0, 0, 0] x3 = tensor[3, 0, 0, 0] y1 = tensor[0, 1, 0, 0] y2 = tensor[0, 2, 0, 0] y3 = tensor[0, 3, 0, 0] z1 = tensor[0, 0, 1, 0] z2 = tensor[0, 0, 2, 0] z3 = tensor[0, 0, 3, 0] w1 = tensor[0, 0, 0, 1] w2 = tensor[0, 0, 0, 2] w3 = tensor[0, 0, 0, 3] simplex = np.concatenate((simplex, x1 * np.array([y1, y2, y3]))) simplex = np.concatenate((simplex, x2 * np.array([y1, y2]))) simplex = np.concatenate((simplex, np.array([x3 * y1]))) simplex = np.concatenate((simplex, x1 * np.array([z1, z2, z3]))) simplex = np.concatenate((simplex, x2 * np.array([z1, z2]))) simplex = np.concatenate((simplex, x3 * np.array([z1]))) simplex = np.concatenate((simplex, y1 * np.array([z1, z2, z3]))) simplex = np.concatenate((simplex, y2 * np.array([z1, z2]))) simplex = np.concatenate((simplex, y3 * np.array([z1]))) simplex = np.concatenate((simplex, x1 * np.array([w1, w2, w3]))) simplex = np.concatenate((simplex, x2 * np.array([w1, w2]))) simplex = np.concatenate((simplex, x3 * np.array([w1]))) simplex = np.concatenate((simplex, y1 * np.array([w1, w2, w3]))) simplex = np.concatenate((simplex, y2 * np.array([w1, w2]))) simplex = np.concatenate((simplex, y3 * np.array([w1]))) simplex = np.concatenate((simplex, z1 * np.array([w1, w2, w3]))) simplex = np.concatenate((simplex, z2 * np.array([w1, w2]))) simplex = np.concatenate((simplex, z3 * np.array([w1]))) simplex = np.concatenate((simplex, x1 * np.array([y1 * z1, y1 * z2]))) simplex = np.concatenate((simplex, x2 * np.array([y1 * z1]))) simplex = np.concatenate((simplex, x1 * np.array([y2 * z1]))) simplex = np.concatenate((simplex, x1 * np.array([y1 * w1, y1 * w2]))) simplex = np.concatenate((simplex, x2 * np.array([y1 * w1]))) simplex = np.concatenate((simplex, x1 * np.array([y2 * w1]))) simplex = np.concatenate((simplex, x1 * np.array([z1 * w1, z1 * w2]))) simplex = np.concatenate((simplex, x2 * np.array([z1 * w1]))) simplex = np.concatenate((simplex, y1 * np.array([z1 * w1, z1 * w2]))) simplex = np.concatenate((simplex, y2 * np.array([z1 * w1]))) simplex = np.concatenate((simplex, x1 * np.array([z2 * w1]))) simplex = np.concatenate((simplex, y1 * np.array([z2 * w1]))) simplex = np.concatenate((simplex, x1 * np.array([y1 * z1 * w1]))) return simplex