def test_gen_even_slices(): # check that gen_even_slices contains all samples some_range = range(10) joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) assert_array_equal(some_range, joined_range) # check that passing negative n_chunks raises an error slices = gen_even_slices(10, -1) assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be" " >=1", next, slices)
def _mean_of_squares(signals, n_batches=20): """Compute mean of squares for each signal. This function is equivalent to var = np.copy(signals) var **= 2 var = var.mean(axis=0) but uses a lot less memory. Parameters ---------- signals : numpy.ndarray, shape (n_samples, n_features) signal whose mean of squares must be computed. n_batches : int, optional number of batches to use in the computation. Tweaking this value can lead to variation of memory usage and computation time. The higher the value, the lower the memory consumption. """ # No batching for small arrays if signals.shape[1] < 500: n_batches = 1 # Fastest for C order var = np.empty(signals.shape[1]) for batch in gen_even_slices(signals.shape[1], n_batches): tvar = np.copy(signals[:, batch]) tvar **= 2 var[batch] = tvar.mean(axis=0) return var
def _mean_of_squares(signals, n_batches=20): """Compute mean of squares for each signal. This function is equivalent to var = np.copy(signals) var **= 2 var = var.mean(axis=0) but uses a lot less memory. Parameters ========== signals : numpy.ndarray, shape (n_samples, n_features) signal whose mean of squares must be computed. n_batches : int, optional number of batches to use in the computation. Tweaking this value can lead to variation of memory usage and computation time. The higher the value, the lower the memory consumption. """ # No batching for small arrays if signals.shape[1] < 500: n_batches = 1 # Fastest for C order var = np.empty(signals.shape[1]) for batch in gen_even_slices(signals.shape[1], n_batches): tvar = np.copy(signals[:, batch]) tvar **= 2 var[batch] = tvar.mean(axis=0) return var
def transform(self, X, y=None): """Calculate the entropy of each array in `X`. Parameters ---------- X : ndarray, shape (n_samples, n_points, d) Input data. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of int, shape (n_samples, n_points) Array of entropies (one per array in `X`). """ # Check if fit had been called check_is_fitted(self, ['_is_fitted']) X = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._permutation_entropy)(X[s]) for s in gen_even_slices(len(X), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def transform(self, X, y=None): """Compute derivatives of multi-channel curves. Parameters ---------- X : ndarray of shape (n_samples, n_channels, n_bins) Input collection of multi-channel curves. y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_channels, n_bins - order) Output collection of multi-channel curves given by taking discrete differences of order `order` in each channel in the curves in `X`. """ check_is_fitted(self) Xt = check_array(X, ensure_2d=False, allow_nd=True) if Xt.ndim != 3: raise ValueError("Input must be 3-dimensional.") Xt = Parallel(n_jobs=self.n_jobs)( delayed(np.diff)(Xt[s], n=self.order, axis=-1) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def transform(self, X, y=None): """For each greyscale image in the collection `X`, calculate a corresponding binary image by applying the `threshold`. Return the collection of binary images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D greyscale image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y \ [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D binary image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._binarize)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) if self.n_dimensions_ == 2: Xt = Xt.reshape(X.shape) return Xt
def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding grayscale image based on the distance of its pixels to the center. Return the collection of grayscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D grayscale image. """ check_is_fitted(self) Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_radial)(X[s]) for s in gen_even_slices( Xt.shape[0], effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def transform(self, X, y=None): """For each binary image in the collection `X`, adds a padding. Return the collection of padded binary images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x + 2 * padding_x, \ n_pixels_y + 2 * padding_y [, n_pixels_z + 2 * padding_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D binary image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.pad)(Xt[s], pad_width=self._pad_width, constant_values=self.value) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def _parallel_pairwise(X1, X2, metric, metric_params, homology_dimensions, n_jobs): metric_func = implemented_metric_recipes[metric] effective_metric_params = metric_params.copy() none_dict = {dim: None for dim in homology_dimensions} samplings = effective_metric_params.pop("samplings", none_dict) step_sizes = effective_metric_params.pop("step_sizes", none_dict) if metric in ["heat", "persistence_image"]: parallel_kwargs = {"mmap_mode": "c"} else: parallel_kwargs = {} n_columns = len(X2) distance_matrices = Parallel(n_jobs=n_jobs, **parallel_kwargs)( delayed(metric_func)(_subdiagrams(X1, [dim], remove_dim=True), _subdiagrams(X2[s], [dim], remove_dim=True), sampling=samplings[dim], step_size=step_sizes[dim], **effective_metric_params) for dim in homology_dimensions for s in gen_even_slices(n_columns, effective_n_jobs(n_jobs))) distance_matrices = np.concatenate(distance_matrices, axis=1) distance_matrices = np.stack([ distance_matrices[:, i * n_columns:(i + 1) * n_columns] for i in range(len(homology_dimensions)) ], axis=2) return distance_matrices
def _e_step(self, X, cal_sstats, random_init, parallel=None): """E-step in EM update. Parameters ---------- X : array-like or sparse matrix, shape=(n_samples, n_features) Document word matrix. cal_sstats : boolean Parameter that indicate whether to calculate sufficient statistics or not. Set ``cal_sstats`` to True when we need to run M-step. random_init : boolean Parameter that indicate whether to initialize document topic distribution randomly in the E-step. Set it to True in training steps. parallel : joblib.Parallel (optional) Pre-initialized instance of joblib.Parallel. Returns ------- (doc_topic_distr, suff_stats) : `doc_topic_distr` is unnormalized topic distribution for each document. In the literature, this is called `gamma`. `suff_stats` is expected sufficient statistics for the M-step. When `cal_sstats == False`, it will be None. """ # Run e-step in parallel random_state = self.random_state_ if random_init else None # TODO: make Parallel._effective_n_jobs public instead? n_jobs = _get_n_jobs(self.n_jobs) if parallel is None: parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) results = parallel( delayed(_update_doc_distribution) (X[idx_slice, :], self.exp_dirichlet_component_, self.doc_topic_prior_, self.max_doc_update_iter, self.mean_change_tol, cal_sstats, random_state) for idx_slice in gen_even_slices(X.shape[0], n_jobs)) # merge result doc_topics, sstats_list = zip(*results) doc_topic_distr = np.vstack(doc_topics) if cal_sstats: # This step finishes computing the sufficient statistics for the # M-step. suff_stats = np.zeros(self.components_.shape) for sstats in sstats_list: suff_stats += sstats suff_stats *= self.exp_dirichlet_component_ else: suff_stats = None return (doc_topic_distr, suff_stats)
def train(self, X): n_samples, n_features = X.shape for i in range(self.numepochs): kk = np.random.permutation(m) err = 0 for l in gen_even_slices(n_samples, self.batchsize): batch = x[l, :] v1 = batch # n_samples X n_visible h1 = sigmrnd(np.dot(v1, self.W.T) + self.c) # n_samples X n_hidden v2 = sigmrnd(np.dot(h1, self.W) + self.b) # n_samples X n_visible h2 = sigm(np.dot(v2, self.W.T) + self.c) # n_samples X n_hidden c1 = np.dot(h1.T, v1) # n_hidden X n_visible c2 = np.dot(h2.T, v2) # n_hidden X n_visible self.vW = self.momentum * self.vW + self.alpha * (c1 - c2) / self.batchsize # n_hidden X n_visible self.vb = self.momentum * self.vb + self.alpha * np.sum(v1 - v2, axis=0) / self.batchsize # n_visible X 1 self.vc = self.momentum * self.vc + self.alpha * np.sum(h1 - h2, axis=0) / self.batchsize # n_hidden X 1 self.W = self.W + self.vW # n_hidden X n_visible self.b = self.b + self.vb # n_visible X 1 self.c = self.c + self.vc # n_visible X 1 err = err + np.sum(np.power(v1 - v2), 2) / self.batchsize print 'epoch '+ str(i) + '/' + str(self.numepochs) + '. Average reconstruction error is: ' + str(err / numbatches)
def transform(self, X, y=None): """For each binary image in the collection `X`, calculate its negation. Return the collection of negated binary images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y \ [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D binary image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._invert)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def transform(self, X, y=None): """For each collection of binary images, calculate the corresponding collection of point clouds based on the coordinates of activated pixels. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x * n_pixels_y [* \ n_pixels_z], n_dimensions) Transformed collection of images. Each entry along axis 0 is a point cloud in ``n_dimensions``-dimensional space. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = np.swapaxes(np.flip(Xt, axis=1), 1, 2) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._embed)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = reduce(iconcat, Xt, []) return Xt
def _parallel_learning(self, X, Y, w): n_samples = len(X) objective, positive_slacks = 0, 0 verbose = max(0, self.verbose - 3) if self.batch_size is not None: raise ValueError("If n_jobs != 1, batch_size needs to" "be None") # generate batches of size n_jobs # to speed up inference if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.n_jobs n_batches = int(np.ceil(float(len(X)) / n_jobs)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] candidate_constraints = Parallel(n_jobs=self.n_jobs, verbose=verbose)( delayed(find_constraint)(self.model, x, y, w) for x, y in zip(X_b, Y_b) ) dpsi = np.zeros(self.model.size_psi) for x, y, constraint in zip(X_b, Y_b, candidate_constraints): y_hat, delta_psi, slack, loss = constraint if slack > 0: objective += slack dpsi += delta_psi positive_slacks += 1 w = self._solve_subgradient(dpsi, n_samples, w) return objective, positive_slacks, w
def transform(self, X, y=None): """Compute the persistence entropies of diagrams in `X`. Parameters ---------- X : ndarray, shape (n_samples, n_features, 3) Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray, shape (n_samples, n_homology_dimensions) Persistence entropies: one value per sample and per homology dimension seen in :meth:`fit`. Index i along axis 1 corresponds to the i-th homology dimension in :attr:`homology_dimensions_`. """ # Check if fit had been called check_is_fitted(self, ['_is_fitted']) X = check_diagram(X) with np.errstate(divide='ignore', invalid='ignore'): Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._persistence_entropy)(_subdiagrams(X, [dim])[s]) for dim in self.homology_dimensions_ for s in gen_even_slices( X.shape[0], effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt).reshape((self._n_dimensions, X.shape[0])).T return Xt
def _parallel_learning(self, X, Y, w): n_samples = len(X) objective, positive_slacks = 0, 0 verbose = max(0, self.verbose - 3) if self.batch_size is not None: raise ValueError("If n_jobs != 1, batch_size needs to" "be None") # generate batches of size n_jobs # to speed up inference if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.n_jobs n_batches = int(np.ceil(float(len(X)) / n_jobs)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] candidate_constraints = Parallel( n_jobs=self.n_jobs, verbose=verbose)(delayed(find_constraint)(self.model, x, y, w) for x, y in zip(X_b, Y_b)) djoint_feature = np.zeros(self.model.size_joint_feature) for x, y, constraint in zip(X_b, Y_b, candidate_constraints): y_hat, delta_joint_feature, slack, loss = constraint if slack > 0: objective += slack djoint_feature += delta_joint_feature positive_slacks += 1 w = self._solve_subgradient(djoint_feature, n_samples, w) return objective, positive_slacks, w
def transform(self, X, y=None): """Compute the Betti curves of diagrams in `X`. Parameters ---------- X : ndarray of shape (n_samples, n_features, 3) Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_homology_dimensions, n_bins) Betti curves: one curve (represented as a one-dimensional array of integer values) per sample and per homology dimension seen in :meth:`fit`. Index i along axis 1 corresponds to the i-th homology dimension in :attr:`homology_dimensions_`. """ check_is_fitted(self) X = check_diagrams(X) Xt = Parallel(n_jobs=self.n_jobs)( delayed(betti_curves)(_subdiagrams(X, [dim], remove_dim=True)[s], self._samplings[dim]) for dim in self.homology_dimensions_ for s in gen_even_slices( X.shape[0], effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt).\ reshape(self._n_dimensions, X.shape[0], -1).\ transpose((1, 0, 2)) return Xt
def _parallel_pairwise(X1, X2, metric, metric_params, homology_dimensions, n_jobs): metric_func = implemented_metric_recipes[metric] effective_metric_params = metric_params.copy() none_dict = {dim: None for dim in homology_dimensions} samplings = effective_metric_params.pop('samplings', none_dict) step_sizes = effective_metric_params.pop('step_sizes', none_dict) if X2 is None: X2 = X1 distance_matrices = Parallel(n_jobs=n_jobs)( delayed(metric_func)(_subdiagrams(X1, [dim], remove_dim=True), _subdiagrams(X2[s], [dim], remove_dim=True), sampling=samplings[dim], step_size=step_sizes[dim], **effective_metric_params) for dim in homology_dimensions for s in gen_even_slices(X2.shape[0], effective_n_jobs(n_jobs))) distance_matrices = np.concatenate(distance_matrices, axis=1) distance_matrices = np.stack([ distance_matrices[:, i * X2.shape[0]:(i + 1) * X2.shape[0]] for i in range(len(homology_dimensions)) ], axis=2) return distance_matrices
def transform(self, X, y=None): """Calculate the permutation entropy of each two-dimensional array in `X`. Parameters ---------- X : ndarray of shape (n_samples, n_points, n_dimensions) Input data. y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of int, shape (n_samples, 1) One permutation entropy per entry in `X` along axis 0. """ check_is_fitted(self, '_is_fitted') Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._permutation_entropy)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the distance of its pixels to the hyperplane defined by the `direction` vector and the first seen edge of the images following that `direction`. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_height)(X[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
def fit(self, X, y=None): """Fit the model to the data X. Parameters ---------- X : {array-like, sparse matrix} shape (n_samples, n_features) Training data. Returns ------- self : BernoulliRBM The fitted model. """ X, = check_arrays(X, sparse_format='csr', dtype=np.float) n_samples = X.shape[0] rng = check_random_state(self.random_state) self.components_ = np.asarray( rng.normal(0, 0.01, (self.n_components, X.shape[1])), order='fortran') self.intercept_hidden_ = np.zeros(self.n_components, ) self.intercept_visible_ = np.zeros(X.shape[1], ) self.h_samples_ = np.zeros((self.batch_size, self.n_components)) n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches, n_samples)) for iteration in xrange(1, self.n_iter + 1): for batch_slice in batch_slices: self._fit(X[batch_slice], rng) return self
def fit(self, x_train, n_hidden): n_train = x_train.shape[0] n_visible = x_train.shape[1] rng = get_rng(self.random_state) self.W = tf.Variable(rng.normal(0, 0.01, size=(n_hidden, n_visible)), dtype='float32') self.b = tf.Variable(tf.zeros(shape=(n_visible, ))) self.c = tf.Variable(tf.zeros(shape=(n_hidden, ))) n_batches = math.ceil(n_train / batch_size) batch_slices = list( gen_even_slices(n_batches * batch_size, n_batches, n_samples=n_train)) h_samples = tf.zeros(shape=(batch_size, n_hidden)) for i in range(n_iter): for batch_slice in batch_slices: v = x_train[batch_slice] h = self.get_hidden(v) v_samples, _ = self.sample_visible(h_samples, rng) h_samples, h_prime = self.sample_hidden(v_samples, rng) dW = tf.matmul(tf.transpose(h), v) - tf.matmul( tf.transpose(h_prime), v_samples) db = tf.reduce_sum(v, axis=0) - tf.reduce_sum(v_samples, axis=0) dc = tf.reduce_sum(h, axis=0) - tf.reduce_sum(h_prime, axis=0) alpha = learning_rate / v.shape[0] self.W.assign_add(alpha * dW) self.b.assign_add(alpha * db) self.c.assign_add(alpha * dc)
def _sequential_learning(self, X, Y, w): n_samples = len(X) objective, positive_slacks = 0, 0 if self.batch_size in [None, 1]: # online learning for x, y in zip(X, Y): y_hat, delta_psi, slack, loss = find_constraint(self.model, x, y, w) objective += slack if slack > 0: positive_slacks += 1 self._solve_subgradient(delta_psi, n_samples, w) else: # mini batch learning if self.batch_size == -1: slices = [slice(0, len(X)), None] else: n_batches = int(np.ceil(float(len(X)) / self.batch_size)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] Y_hat = self.model.batch_loss_augmented_inference(X_b, Y_b, w, relaxed=True) delta_psi = self.model.batch_psi(X_b, Y_b) - self.model.batch_psi(X_b, Y_hat) loss = np.sum(self.model.batch_loss(Y_b, Y_hat)) violation = np.maximum(0, loss - np.dot(w, delta_psi)) objective += violation positive_slacks += self.batch_size self._solve_subgradient(delta_psi / len(X_b), n_samples, w) return objective, positive_slacks, w
def fit(self, X, y=None): """Fit the model to the data X. Parameters ---------- X : {array-like, sparse matrix} shape (n_samples, n_features) Training data. Returns ------- self : BernoulliRBM The fitted model. """ X, = check_arrays(X, sparse_format='csr', dtype=np.float) n_samples = X.shape[0] rng = check_random_state(self.random_state) self.components_ = np.asarray(rng.normal( 0, 0.01, (self.n_components, X.shape[1])), order='fortran') self.intercept_hidden_ = np.zeros(self.n_components, ) self.intercept_visible_ = np.zeros(X.shape[1], ) self.h_samples_ = np.zeros((self.batch_size, self.n_components)) n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list( gen_even_slices(n_batches * self.batch_size, n_batches, n_samples)) for iteration in xrange(1, self.n_iter + 1): for batch_slice in batch_slices: self._fit(X[batch_slice], rng) return self
def fit(self, x_train, n_hidden): n_train = x_train.shape[0] n_visible = x_train.shape[1] rng = get_rng(self.random_state) self.W = rng.normal(0, 0.01, size=(n_hidden, n_visible)) self.b = np.zeros(shape=(n_visible, )) self.c = np.zeros(shape=(n_hidden, )) n_batches = int(np.ceil(n_train / batch_size)) batch_slices = list( gen_even_slices(n_batches * batch_size, n_batches, n_samples=n_train)) h_samples = np.zeros(shape=(batch_size, n_hidden)) for i in range(n_iter): for batch_slice in batch_slices: v = x_train[batch_slice] h = self.get_hidden(v) v_samples, _ = self.sample_visible(h_samples, rng) h_samples, h_prime = self.sample_hidden(v_samples, rng) dW = np.dot(h.T, v) - np.dot(h_prime.T, v_samples) db = v.sum(axis=0) - v_samples.sum(axis=0) dc = h.sum(axis=0) - h_prime.sum(axis=0) alpha = learning_rate / v.shape[0] self.W += (alpha * dW) self.b += (alpha * db) self.c += (alpha * dc)
def _sequential_learning(self, X, Y, w): n_samples = len(X) objective, positive_slacks = 0, 0 if self.batch_size in [None, 1]: # online learning for x, y in zip(X, Y): y_hat, delta_psi, slack, loss = \ find_constraint(self.model, x, y, w) objective += slack if slack > 0: positive_slacks += 1 self._solve_subgradient(delta_psi, n_samples, w) else: # mini batch learning if self.batch_size == -1: slices = [slice(0, len(X)), None] else: n_batches = int(np.ceil(float(len(X)) / self.batch_size)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] Y_hat = self.model.batch_loss_augmented_inference(X_b, Y_b, w, relaxed=True) delta_psi = (self.model.batch_psi(X_b, Y_b) - self.model.batch_psi(X_b, Y_hat)) loss = np.sum(self.model.batch_loss(Y_b, Y_hat)) violation = np.maximum(0, loss - np.dot(w, delta_psi)) objective += violation positive_slacks += self.batch_size self._solve_subgradient(delta_psi / len(X_b), n_samples, w) return objective, positive_slacks, w
def parallel_predict(self, X, n_jobs): """ Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- y : array of shape = [n_samples] The predicted target value. n_jobs : The number of jobs you want to run in parallel. See sklearn for how to use it. """ if n_jobs < 0: n_jobs = max(cpu_count() + 1 + n_jobs, 1) if n_jobs == 1: # Special case without multiprocessing parallelism return self.predict(X) fd = delayed(self.predict) ret = Parallel(n_jobs=n_jobs, verbose=0)(fd(X[s]) for s in gen_even_slices(X.shape[0], n_jobs)) #print('What is the returnvalue? ', ret[0][:100]) ret = [np.hstack(li) for li in ret] return np.hstack(ret)
def _mini_batch_compute(self, n_samples): ''' Compute equal sized minibatches ( indexes ) This method is taken from sklearn/neural_network/rbm.py ''' n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size,n_batches, n_samples)) return batch_slices
def fit(self, X, y=None): """ Fit the model to the data X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. Returns ------- self """ X = atleast2d_or_csr(X, dtype=np.float64, order="C") n_samples, n_features = X.shape self._init_fit(n_features) self._init_param() self._init_t_eta_() if self.shuffle_data: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not use mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / batch_size batch_slices = list( gen_even_slices(n_batches * batch_size, n_batches)) # preallocate memory a_hidden = np.empty((batch_size, self.n_hidden)) a_output = np.empty((batch_size, n_features)) delta_o = np.empty((batch_size, n_features)) if self.algorithm == 'sgd': prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost = self.backprop_sgd(X[batch_slice], n_features, batch_size, delta_o, a_hidden, a_output) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost self.t_ += 1 elif self.algorithm == 'l-bfgs': self._backprop_lbfgs(X, n_features, a_hidden, a_output, delta_o, n_samples) return self
def _mini_batch_compute(self, n_samples): ''' Compute equal sized minibatches ( indexes ) This method is taken from sklearn/neural_network/rbm.py ''' n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list( gen_even_slices(n_batches * self.batch_size, n_batches, n_samples)) return batch_slices
def fit(self, X, Y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data, where n_samples in the number of samples and n_features is the number of features. Y : numpy array of shape [n_samples] Subset of the target values. Returns ------- self """ self.n_layers = len(self.n_hidden) X = atleast2d_or_csr(X, dtype=np.float64, order="C") n_outputs = Y.shape[1] n_samples, n_features = X.shape self._init_fit(X, Y, n_features, n_outputs) self._init_param() if self.shuffle_data: X, Y = shuffle(X, Y, random_state=self.random_state) self.batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / self.batch_size batch_slices = list( gen_even_slices(n_batches * self.batch_size, n_batches)) # l-bfgs does not work well with batches if self.algorithm == 'l-bfgs': self.batch_size = n_samples # preallocate memory a_hidden = [0] * self.n_layers a_output = np.empty((self.batch_size, n_outputs)) delta_o = np.empty((self.batch_size, n_outputs)) # print 'Fine tuning...' if self.algorithm is 'sgd': eta = self.eta0 t = 1 prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost, eta = self.backprop_sgd(X[batch_slice], Y[batch_slice], self.batch_size, a_hidden, a_output, delta_o, t, eta) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost t += 1 elif 'l-bfgs': self._backprop_lbfgs(X, Y, n_features, n_outputs, n_samples, a_hidden, a_output, delta_o) return self
def check_gen_even_slices(): even = csr_matrix((1032,1030)) odd = csr_matrix((1033,1033)) batch_size = 100 even_batches = int(np.ceil(float(even.shape[0]) / batch_size)) odd_batches = int(np.ceil(float(odd.shape[0]) / batch_size)) odd_slices = list(gen_even_slices(odd_batches * batch_size, odd_batches, odd.shape[0])) even_slices = list(gen_even_slices(even_batches * batch_size, even_batches, even.shape[0])) assert slices_bounds_check(even,even_slices) == "passes", "Fails on Even number of rows" assert slices_bounds_check(odd,odd_slices) == "passes", "Fails on Odd number of rows" print("OK")
def get_custom_gram_matrix(metric, X, Y=None, n_jobs=1): if Y is None: Y = X func = partial(custom_cdist, metric=metric) fd = delayed(func) ret = Parallel(n_jobs=n_jobs, verbose=1)(fd(X, Y[s]) for s in gen_even_slices(Y.shape[0], n_jobs)) rez = np.hstack(ret) return rez
def fit(self, data): num_examples = data.shape[0] self.h_samples_ = np.zeros((self.batch_size, self.num_hidden)) n_batches = int(np.ceil(float(num_examples) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches, num_examples)) for iteration in xrange(1, self.max_epochs + 1): for batch_slice in batch_slices: self._fit(data[batch_slice])
def _detrend(signals, inplace=False, type="linear", n_batches=10): """Detrend columns of input array. Signals are supposed to be columns of `signals`. This function is significantly faster than scipy.signal.detrend on this case and uses a lot less memory. Parameters ========== signals : numpy.ndarray This parameter must be two-dimensional. Signals to detrend. A signal is a column. inplace : bool, optional Tells if the computation must be made inplace or not (default False). type : str, optional Detrending type ("linear" or "constant"). See also scipy.signal.detrend. n_batches : int, optional number of batches to use in the computation. Tweaking this value can lead to variation of memory usage and computation time. The higher the value, the lower the memory consumption. Returns ======= detrended_signals: numpy.ndarray Detrended signals. The shape is that of 'signals'. """ if not inplace: signals = signals.copy() signals -= np.mean(signals, axis=0) if type == "linear": # Keeping "signals" dtype avoids some type conversion further down, # and can save a lot of memory if dtype is single-precision. regressor = np.arange(signals.shape[0], dtype=signals.dtype) regressor -= regressor.mean() std = np.sqrt((regressor ** 2).sum()) # avoid numerical problems if not std < np.finfo(np.float).eps: regressor /= std regressor = regressor[:, np.newaxis] # No batching for small arrays if signals.shape[1] < 500: n_batches = 1 # This is fastest for C order. for batch in gen_even_slices(signals.shape[1], n_batches): signals[:, batch] -= np.dot(regressor[:, 0], signals[:, batch] ) * regressor return signals
def fit(self, data): num_examples = data.shape[0] self.h_samples_ = np.zeros((self.batch_size, self.num_hidden)) n_batches = int(np.ceil(float(num_examples) / self.batch_size)) batch_slices = list( gen_even_slices(n_batches * self.batch_size, n_batches, num_examples)) for iteration in xrange(1, self.max_epochs + 1): for batch_slice in batch_slices: self._fit(data[batch_slice])
def load_data(name, partition_id, n_partitions): """load partition of data into global var `name`""" from sklearn.datasets import fetch_20newsgroups_vectorized from sklearn.utils import gen_even_slices dataset = fetch_20newsgroups_vectorized('test') size = dataset.data.shape[0] slices = list(gen_even_slices(size, n_partitions)) part = dataset.data[slices[partition_id]] # put it in globals globals().update({name : part}) return part.shape
def shuffle_audio(audio, chunk_length=0.5, sr=None): n_chunks = int((audio.size / sr) / chunk_length) if n_chunks in (0, 1): return audio slices = list(gen_even_slices(audio.size, n_chunks)) random.shuffle(slices) shuffled = np.concatenate([audio[s] for s in slices]) return shuffled
def _parallel_inner_prod(X, Y, func, n_jobs, **kwds): """Break the pairwise matrix in n_jobs even slices and compute them in parallel""" if n_jobs < 0: n_jobs = max(cpu_count() + 1 + n_jobs, 1) if Y is None: Y = X ret = Parallel(n_jobs=n_jobs, verbose=0)( delayed(func)(X[s], Y, **kwds) for s in gen_even_slices(len(X), n_jobs)) return np.hstack(ret)
def _e_step(self, X, cal_sstats, cal_doc_distr, cal_likelihood, parallel=None): if parallel: n_jobs = parallel.n_jobs results = parallel(delayed( _update_var_local_params)(X[idx_slice, :], self.elog_beta_, self.elog_v_stick_, self.n_doc_truncate, self.alpha, self.max_doc_update_iter, self.mean_change_tol, cal_sstats, cal_doc_distr, self.burn_in_iters, self.check_doc_likelihood, cal_likelihood) for idx_slice in gen_even_slices(X.shape[0], n_jobs)) doc_topics, sstats_list, ll_list = zip(*results) doc_topic_distr = np.vstack(doc_topics) if cal_doc_distr else None doc_likelihood = np.sum(ll_list) if cal_likelihood else None sstats = None if cal_sstats: lambda_sstats = np.zeros(self.lambda_.shape) v_stick_sstats = np.zeros((self.n_topic_truncate, )) for sstats in sstats_list: lambda_sstats += sstats['lambda'] v_stick_sstats += sstats['v_stick'] sstats = { 'lambda': lambda_sstats, 'v_stick': v_stick_sstats, } else: doc_topic_distr, sstats, doc_likelihood = \ _update_var_local_params(X, self.elog_beta_, self.elog_v_stick_, self.n_doc_truncate, self.alpha, self.max_doc_update_iter, self.mean_change_tol, cal_sstats, cal_doc_distr, self.burn_in_iters, self.check_doc_likelihood, cal_likelihood) return (doc_topic_distr, sstats, doc_likelihood)
def fit(self, X): """Fit SGVB to the data Parameters ---------- X : array-like, shape (N, n_features) The data that the SGVB needs to fit on Returns ------- list_lowerbound : list of int list of lowerbound over time """ X, = check_arrays(X, sparse_format='csr', dtype=np.float) [N, dimX] = X.shape rng = check_random_state(self.random_state) self._initParams(dimX, rng) list_lowerbound = np.array([]) n_batches = int(np.ceil(float(N) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches, N)) if self.verbose: print "Initializing gradients for AdaGrad" for i in xrange(10): self._initH(X[batch_slices[i]], rng) begin = time.time() for iteration in xrange(1, self.n_iter + 1): iteration_lowerbound = 0 for batch_slice in batch_slices: lowerbound = self._updateParams(X[batch_slice], N, rng) iteration_lowerbound += lowerbound if self.verbose: end = time.time() print("[%s] Iteration %d, lower bound = %.2f," " time = %.2fs" % (self.__class__.__name__, iteration, iteration_lowerbound / N, end - begin)) begin = end list_lowerbound = np.append( list_lowerbound, iteration_lowerbound / N) return list_lowerbound
def fit(self, X, y=None): """Fit the model to the data X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data. Returns ------- self : BernoulliRBM The fitted model. """ X, = check_arrays(X, sparse_format='csc', dtype=np.float) n_samples = X.shape[0] rng = check_random_state(self.random_state) self.components_ = np.asarray( rng.normal(0, 0.01, (self.n_components, X.shape[1])), order='fortran') self.intercept_hidden_ = np.zeros(self.n_components, ) self.intercept_visible_ = np.zeros(X.shape[1], ) self.h_samples_ = np.zeros((self.batch_size, self.n_components)) n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches)) verbose = self.verbose for iteration in xrange(self.n_iter): pl = 0. if verbose: begin = time.time() for batch_slice in batch_slices: pl_batch = self._fit(X[batch_slice], rng) if verbose: pl += pl_batch.sum() if verbose: pl /= n_samples end = time.time() print("Iteration %d, pseudo-likelihood = %.2f, time = %.2fs" % (iteration, pl, end - begin)) return self
def fit(self, X, y, max_epochs, shuffle_data, staged_sample=None, verbose=0): # get all sizes n_samples, n_features = X.shape if y.shape[0] != n_samples: raise ValueError("Shapes of X and y don't fit.") self.n_outs = y.shape[1] # n_batches = int(np.ceil(float(n_samples) / self.batch_size)) n_batches = n_samples / self.batch_size if n_samples % self.batch_size != 0: warnings.warn("Discarding some samples: \ sample size not divisible by chunk size.") n_iterations = int(max_epochs * n_batches) if shuffle_data: X, y = shuffle(X, y) # generate batch slices batch_slices = list( gen_even_slices(n_batches * self.batch_size, n_batches)) # generate weights. # TODO: smart initialization self.weights1_ = np.random.uniform( size=(n_features, self.n_hidden)) / np.sqrt(n_features) self.bias1_ = np.zeros(self.n_hidden) self.weights2_ = np.random.uniform( size=(self.n_hidden, self.n_outs)) / np.sqrt(self.n_hidden) self.bias2_ = np.zeros(self.n_outs) # preallocate memory x_hidden = np.empty((self.batch_size, self.n_hidden)) delta_h = np.empty((self.batch_size, self.n_hidden)) x_output = np.empty((self.batch_size, self.n_outs)) delta_o = np.empty((self.batch_size, self.n_outs)) self.oo_score = [] # main loop for i, batch_slice in izip(xrange(n_iterations), cycle(batch_slices)): self._forward(i, X, batch_slice, x_hidden, x_output, testing=False) self._backward( i, X, y, batch_slice, x_hidden, x_output, delta_o, delta_h) if staged_sample is not None: self.oo_score.append(self.predict(staged_sample)) return self
def fit(self, X, y=None): """Fit the model to the data X. Parameters ---------- X : {array-like, sparse matrix} shape (n_samples, n_features) Training data. Returns ------- self : BernoulliRBM The fitted model. """ X = check_array(X, accept_sparse='csr', dtype=np.float64) n_samples = X.shape[0] rng = check_random_state(self.random_state) self.components_ = np.asarray( rng.normal(0, 0.01, (self.n_components, X.shape[1])), order='fortran') self.intercept_hidden_ = np.zeros(self.n_components, ) self.intercept_visible_ = np.zeros(X.shape[1], ) self.h_samples_ = np.zeros((self.batch_size, self.n_components)) n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches, n_samples)) verbose = self.verbose begin = time.time() for iteration in xrange(1, self.n_iter + 1): for batch_slice in batch_slices: self._fit(X[batch_slice], rng) if verbose: end = time.time() print("[%s] Iteration %d, pseudo-likelihood = %.2f," " time = %.2fs" % (type(self).__name__, iteration, self.score_samples(X).mean(), end - begin)) begin = end return self
def fit(self, X, y, max_epochs, shuffle_data, verbose=0): # get all sizes n_samples, n_features = X.shape if y.shape[0] != n_samples: raise ValueError("Shapes of X and y don't fit.") self.n_outs = y.shape[1] #n_batches = int(np.ceil(float(n_samples) / self.batch_size)) n_batches = n_samples / self.batch_size if n_samples % self.batch_size != 0: warnings.warn("Discarding some samples: \ sample size not divisible by chunk size.") n_iterations = int(max_epochs * n_batches) if shuffle_data: X, y = shuffle(X, y) # generate batch slices batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches)) # generate weights. unif_param = np.sqrt(6) / np.sqrt(n_features+self.n_hidden) #as per Bengio & Glorot, AIStats 2010 self.weights1_ = np.random.uniform(low=-unif_param, high=unif_param, size=(n_features, self.n_hidden)) #self.weights1_ = np.random.uniform(size=(n_features, self.n_hidden))/np.sqrt(n_features) self.bias1_ = np.zeros(self.n_hidden) unif_param = np.sqrt(6) / np.sqrt(self.n_hidden+self.n_outs) #as per Bengio & Glorot, AIStats 2010 self.weights2_ = np.random.uniform(low=-unif_param, high=unif_param, size=(self.n_hidden, self.n_outs)) #self.weights2_ = np.random.uniform(size=(self.n_hidden, self.n_outs))/np.sqrt(self.n_hidden) self.bias2_ = np.zeros(self.n_outs) # preallocate memory x_hidden = np.empty((self.batch_size, self.n_hidden)) delta_h = np.empty((self.batch_size, self.n_hidden)) x_output = np.empty((self.batch_size, self.n_outs)) delta_o = np.empty((self.batch_size, self.n_outs)) # main loop for i, batch_slice in izip(xrange(n_iterations), cycle(batch_slices)): self._forward(i, X, batch_slice, x_hidden, x_output) self._backward(i, X, y, batch_slice, x_hidden, x_output, delta_o, delta_h) return self
def _e_step(self, X, cal_delta): """ E-step set `cal_delta == True` when we need to run _m_step for inference, set it to False """ # parell run e-step if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.n_jobs results = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_update_gamma) (X[idx_slice, :], self.expElogbeta, self.alpha, self.rng, 100, self.mean_change_tol, cal_delta) for idx_slice in gen_even_slices(X.shape[0], n_jobs)) # merge result gammas, deltas = zip(*results) gamma = np.vstack(gammas) if cal_delta: # This step finishes computing the sufficient statistics for the # M step, so that # sstats[k, w] = \sum_d n_{dw} * phi_{dwk} # = \sum_d n_{dw} * exp{Elogtheta_{dk} + Elogbeta_{kw}} / phinorm_{dw}. delta_component = np.zeros(self.components_.shape) for delta in deltas: delta_component += delta delta_component *= self.expElogbeta else: delta_component = None return (gamma, delta_component)
def fit(self, X, Y, constraints=None): """Learn parameters using subgradient descent. Parameters ---------- X : iterable Traing instances. Contains the structured input objects. No requirement on the particular form of entries of X is made. Y : iterable Training labels. Contains the strctured labels for inputs in X. Needs to have the same length as X. constraints : None Discarded. Only for API compatibility currently. """ print("Training primal subgradient structural SVM") w = getattr(self, "w", np.zeros(self.problem.size_psi)) #constraints = [] loss_curve = [] objective_curve = [] n_samples = len(X) try: # catch ctrl+c to stop training for iteration in xrange(self.max_iter): positive_slacks = 0 objective = 0. verbose = max(0, self.verbose - 3) if self.n_jobs == 1: # online learning for x, y in zip(X, Y): y_hat, delta_psi, slack, loss = \ find_constraint(self.problem, x, y, w) objective += slack if slack > 0: positive_slacks += 1 w = self._solve_subgradient(w, delta_psi, n_samples) else: # generate batches of size n_jobs # to speed up inference if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.j_jobs n_batches = int(np.ceil(float(len(X)) / n_jobs)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] candidate_constraints = Parallel( n_jobs=self.n_jobs, verbose=verbose)(delayed(find_constraint)( self.problem, x, y, w) for x, y in zip(X_b, Y_b)) dpsi = np.zeros(self.problem.size_psi) for x, y, constraint in zip(X_b, Y_b, candidate_constraints): y_hat, delta_psi, slack, loss = constraint objective += slack dpsi += delta_psi if slack > 0: positive_slacks += 1 dpsi /= float(len(X_b)) w = self._solve_subgradient(w, dpsi, n_samples) # some statistics objective /= len(X) objective += np.sum(w ** 2) / self.C / 2. if positive_slacks == 0: print("No additional constraints") break if self.verbose > 0: print(self) print("iteration %d" % iteration) print("positive slacks: %d," "objective: %f" % (positive_slacks, objective)) objective_curve.append(objective) if self.verbose > 2: print(w) self._compute_training_loss(X, Y, w, iteration) except KeyboardInterrupt: pass self.w = w self.loss_curve_ = loss_curve self.objective_curve_ = objective_curve print("final objective: %f" % objective_curve[-1]) print("calls to inference: %d" % self.problem.inference_calls) return self
def rpbi_core(tested_var, target_vars, n_parcellations, parcellations_labels, n_parcels, confounding_vars=None, model_intercept=True, threshold=None, n_perm=1000, random_state=None, n_jobs=1): """Run RPBI from parcelled data. This is the core method for Randomized Parcellation Based Inference. Parameters ---------- tested_var : array-like, shape=(n_samples, 1), Explanatory variate, fitted and tested. target_vars : array-like, shape=(n_samples, n_parcels_tot) Average signal within parcels of all parcellations, for every subject. n_parcellations : int, Number of (randomized) parcellations. parcellations_labels : array-like, (n_parcellations * n_voxels,) All parcellation's labels ("labels to voxels" map). n_parcels : list of int, Number of parcels for the parcellations. confounding_vars : array-like, shape=(n_samples, n_confounds) Confounding variates (covariates), fitted but not tested. If None (default), no confounding variate is added to the model (except maybe a constant column according to the value of `model_intercept`) model_intercept : bool, If True (default), a constant column is added to the confounding variates unless the tested variate is already the intercept. threshold : float, 0. < threshold < 1., RPBI's threshold to discretize individual parcel-based analysis results. 'auto' (or None) correspond to a threshold of 0.1 divided by the number of parcels per parcellation. n_perm : int, n_perm > 1, Number of permutation to convert the counting statistic into p-values. The higher n_perm, the more precise the results, at the cost of computation time. random_state : int, Random numbers seed for reproducible results. n_jobs : int, Number of parallel workers. Default is 1. If 0 is provided, all CPUs are used. A negative number indicates that all the CPUs except (|n_jobs| - 1) ones must be used. Returns ------- p-values : np.ndarray, shape=(n_voxels,) Negative log10 p-values associated with the significance test of the explanatory variate against the target variate, assessed with Randomized Parcellation Based Inference. Family-wise corrected p-values (max-type procedure). counting_stats_original_data : np.ndarray, shape=(n_voxels,) Counting statistic (i.e. RPBI score) associated with original (non-permuted) data. h0 : np.ndarray, shape=(n_perm,) Maximum value of the counting statistic (i.e. RPBI score) across voxels obtained under each permutation of the original data. """ # initialize the seed of the random generator rng = check_random_state(random_state) # check n_jobs (number of CPUs) n_jobs = check_n_jobs(n_jobs) # make target_vars F-ordered to speed-up computation if target_vars.ndim != 2: raise ValueError("'target_vars' should be a 2D array. " "An array with %d dimension%s was passed" % (target_vars.ndim, "s" if target_vars.ndim > 1 else "")) target_vars = np.asfortranarray(target_vars) # check explanatory variates dimensions if tested_var.ndim == 1: tested_var = np.atleast_2d(tested_var).T n_samples = tested_var.shape[0] # check if explanatory variates is intercept (constant) or not if np.unique(tested_var).size == 1: intercept_test = True else: intercept_test = False # optionally add intercept if model_intercept and not intercept_test: if confounding_vars is not None: confounding_vars = np.hstack( (confounding_vars, np.ones((n_samples, 1)))) else: confounding_vars = np.ones((n_samples, 1)) # orthogonalize design to speed up subsequent permutations orthogonalized_design = orthogonalize_design(tested_var, target_vars, confounding_vars) tested_var_resid_covars = orthogonalized_design[0] target_vars_resid_covars = orthogonalized_design[1] covars_orthonormalized = orthogonalized_design[2] lost_dof = orthogonalized_design[3] # set RPBI threshold # In RPBI, only the scores for which the associated p-value is # below the threshold are considered (we use a F distribution as # an approximation of the scores distribution) if threshold == 'auto' or threshold is None: threshold = 0.1 / n_parcels # Bonferroni correction for parcels ### Permutation of the RPBI analysis # parallel computing units perform a reduced number of permutations each perm_chunks = [(x.start, x.stop) for x in gen_even_slices(n_perm + 1, min(n_perm, n_jobs))] all_chunks_results = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(_univariate_analysis_on_chunk) (n_perm, perm_chunk_start, perm_chunk_stop, tested_var_resid_covars, target_vars_resid_covars, covars_orthonormalized, lost_dof, intercept_test=intercept_test, sparsity_threshold=threshold, random_state=rng.random_integers(np.iinfo(np.int32).max)) for (perm_chunk_start, perm_chunk_stop) in perm_chunks) # reduce results (merge chunks in one big GrowableSparseArray) n_chunks = len(perm_chunks) max_elts_chunk = all_chunks_results[0].max_elts all_results = GrowableSparseArray(n_perm + 1, max_elts=max_elts_chunk * n_chunks) all_results.merge(all_chunks_results) # scores binarization (to be summed later to yield the counting statistic) all_results.data['data'] = binarize(all_results.get_data()['data']) ### Inverse transforms (map back masked voxels into a brain) n_voxels_all_parcellations = parcellations_labels.size n_voxels = n_voxels_all_parcellations / n_parcellations unique_labels_all_parcellations = np.unique(parcellations_labels) n_parcels_all_parcellations = len(unique_labels_all_parcellations) # build parcellations labels as masks. # we need a CSC sparse matrix for efficient computation. we can build # it efficiently using a COO sparse matrix constructor. voxel_ids = np.arange(n_voxels_all_parcellations) % n_voxels parcellation_masks = sparse.coo_matrix( (np.ones(n_voxels_all_parcellations), (parcellations_labels, voxel_ids)), shape=(n_parcels_all_parcellations, n_voxels), dtype=np.float32).tocsc() # slice permutations to treat them in parallel perm_lots_slices = [s for s in gen_even_slices(n_perm + 1, min(n_perm, n_jobs))] perm_lots_sizes = [np.sum(all_results.sizes[s]) for s in perm_lots_slices] perm_lots_cuts = np.concatenate(([0], np.cumsum(perm_lots_sizes))) perm_lots = [ all_results.get_data()[perm_lots_cuts[i]:perm_lots_cuts[i + 1]] for i in xrange(perm_lots_cuts.size - 1)] # put back parcel-based scores to voxel-level scale ret = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(_compute_counting_statistic_from_parcel_level_scores) (perm_lot, perm_lot_slice, parcellation_masks, n_parcellations, n_parcels_all_parcellations) for perm_lot, perm_lot_slice in zip(perm_lots, perm_lots_slices)) # reduce results counting_stats_original_data, h0 = zip(*ret) counting_stats_original_data = counting_stats_original_data[0] h0 = np.sort(np.concatenate(h0)) ### Convert H1 to neg. log. p-values p_values = - np.log10( (n_perm + 1 - np.searchsorted(h0, counting_stats_original_data)) / float(n_perm + 1)) return p_values, counting_stats_original_data, h0
def radius_neighbors(X=None, radius=None, return_distance=True): """Finds the neighbors within a given radius of a point or points. Return the indices and distances of each point from the dataset lying in a ball with size ``radius`` around the points of the query array. Points lying on the boundary are included in the results. The result points are *not* necessarily sorted by distance to their query point. Parameters ---------- X : array-like, (n_samples, n_features), optional The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. radius : float Limiting distance of neighbors to return. (default is the value passed to the constructor). return_distance : boolean, optional. Defaults to True. If False, distances will not be returned Returns ------- dist : array, shape (n_samples,) of arrays Array representing the distances to each point, only present if return_distance=True. The distance values are computed according to the ``metric`` constructor parameter. ind : array, shape (n_samples,) of arrays An array of arrays of indices of the approximate nearest points from the population matrix that lie within a ball of size ``radius`` around the query points. Examples -------- In the following example, we construct a NeighborsClassifier class from an array representing our data set and ask who's the closest point to [1, 1, 1]: >>> import numpy as np >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> from sklearn.neighbors import NearestNeighbors >>> neigh = NearestNeighbors(radius=1.6) >>> neigh.fit(samples) # doctest: +ELLIPSIS NearestNeighbors(algorithm='auto', leaf_size=30, ...) >>> rng = neigh.radius_neighbors([[1., 1., 1.]]) >>> print(np.asarray(rng[0][0])) # doctest: +ELLIPSIS [ 1.5 0.5] >>> print(np.asarray(rng[1][0])) # doctest: +ELLIPSIS [1 2] The first array returned contains the distances to all points which are closer than 1.6, while the second array returned contains their indices. In general, multiple points can be queried at the same time. Notes ----- Because the number of neighbors of each point is not necessarily equal, the results for multiple query points cannot be fit in a standard data array. For efficiency, `radius_neighbors` returns arrays of objects, where each object is a 1D array of indices or distances. """ n_samples = X.shape[0] from joblib import delayed, Parallel from sklearn.utils import gen_even_slices n_jobs = max(mp.cpu_count(), n_samples) print(gen_even_slices(X.shape[0], n_jobs)) fd = delayed(_func) dist = Parallel(n_jobs=n_jobs, verbose=0)( fd(X, X[s]) for s in gen_even_slices(X.shape[0], n_jobs)) neigh_ind_list = [np.where(d > 0)[0] for d in np.hstack(dist)] # print(neigh_ind_list) # See https://github.com/numpy/numpy/issues/5456 # if you want to understand why this is initialized this way. neigh_ind = np.empty(n_samples, dtype='object') neigh_ind[:] = neigh_ind_list return neigh_ind
def fit(self, X, Y, H_init=None): """Learn parameters using subgradient descent. Parameters ---------- X : iterable Traing instances. Contains the structured input objects. No requirement on the particular form of entries of X is made. Y : iterable Training labels. Contains the strctured labels for inputs in X. Needs to have the same length as X. constraints : None Discarded. Only for API compatibility currently. """ print("Training latent subgradient structural SVM") self.w = getattr(self, "w", np.random.normal( 0, .001, size=self.model.size_psi)) #constraints = [] self.objective_curve_ = [] n_samples = len(X) try: # catch ctrl+c to stop training for iteration in xrange(self.max_iter): positive_slacks = 0 objective = 0. #verbose = max(0, self.verbose - 3) if self.n_jobs == 1: # online learning for x, y in zip(X, Y): h = self.model.latent(x, y, self.w) h_hat = self.model.loss_augmented_inference( x, h, self.w, relaxed=True) delta_psi = (self.model.psi(x, h) - self.model.psi(x, h_hat)) slack = (-np.dot(delta_psi, self.w) + self.model.loss(h, h_hat)) objective += np.maximum(slack, 0) if slack > 0: positive_slacks += 1 self._solve_subgradient(delta_psi, n_samples) else: #generate batches of size n_jobs #to speed up inference if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.j_jobs n_batches = int(np.ceil(float(len(X)) / n_jobs)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] verbose = self.verbose - 1 candidate_constraints = Parallel( n_jobs=self.n_jobs, verbose=verbose)(delayed(find_constraint_latent)( self.model, x, y, self.w) for x, y in zip(X_b, Y_b)) dpsi = np.zeros(self.model.size_psi) for x, y, constraint in zip(X_b, Y_b, candidate_constraints): y_hat, delta_psi, slack, loss = constraint objective += slack dpsi += delta_psi if slack > 0: positive_slacks += 1 dpsi /= float(len(X_b)) self._solve_subgradient(dpsi, n_samples) # some statistics objective += np.sum(self.w ** 2) / self.C / 2. #objective /= float(n_samples) if positive_slacks == 0: print("No additional constraints") if self.break_on_no_constraints: break if self.verbose > 0: print(self) print("iteration %d" % iteration) print("positive slacks: %d, " "objective: %f" % (positive_slacks, objective)) self.objective_curve_.append(objective) if self.verbose > 2: print(self.w) self._compute_training_loss(X, Y, iteration) if self.logger is not None: self.logger(self, iteration) except KeyboardInterrupt: pass print("final objective: %f" % self.objective_curve_[-1]) print("calls to inference: %d" % self.model.inference_calls) return self
def fit(self, X, Y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data, where n_samples in the number of samples and n_features is the number of features. Y : numpy array of shape [n_samples] Subset of the target values. Returns ------- self """ self.n_layers = len(self.n_hidden) X = atleast2d_or_csr(X, dtype=np.float64, order="C") n_outputs = Y.shape[1] n_samples, n_features = X.shape self._init_fit(X, Y, n_features, n_outputs) self._init_param() if self.shuffle_data: X, Y = shuffle(X, Y, random_state=self.random_state) self.batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / self.batch_size batch_slices = list( gen_even_slices( n_batches * self.batch_size, n_batches)) # l-bfgs does not work well with batches if self.algorithm == 'l-bfgs': self.batch_size = n_samples # preallocate memory a_hidden = [0] * self.n_layers a_output = np.empty((self.batch_size, n_outputs)) delta_o = np.empty((self.batch_size, n_outputs)) # print 'Fine tuning...' if self.algorithm is 'sgd': eta = self.eta0 t = 1 prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost, eta = self.backprop_sgd( X[batch_slice], Y[batch_slice], self.batch_size, a_hidden, a_output, delta_o, t, eta) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost t += 1 elif 'l-bfgs': self._backprop_lbfgs( X, Y, n_features, n_outputs, n_samples, a_hidden, a_output, delta_o) return self
def fit(self, X, Y, constraints=None): """Learn parameters using cutting plane method. Parameters ---------- X : iterable Traing instances. Contains the structured input objects. No requirement on the particular form of entries of X is made. Y : iterable Training labels. Contains the strctured labels for inputs in X. Needs to have the same length as X. contraints : iterable Known constraints for warm-starts. List of same length as X. Each entry is itself a list of constraints for a given instance x . Each constraint is of the form [y_hat, delta_psi, loss], where y_hat is a labeling, ``delta_psi = psi(x, y) - psi(x, y_hat)`` and loss is the loss for predicting y_hat instead of the true label y. """ print("Training n-slack dual structural SVM") if self.verbose < 2: cvxopt.solvers.options['show_progress'] = False else: cvxopt.solvers.options['show_progress'] = True self.w = np.zeros(self.model.size_psi) n_samples = len(X) if constraints is None: constraints = [[] for i in xrange(n_samples)] else: objective = self._solve_n_slack_qp(constraints, n_samples) loss_curve = [] objective_curve = [] self.alphas = [] # dual solutions # we have to update at least once after going through the dataset for iteration in xrange(self.max_iter): # main loop if self.verbose > 0: print("iteration %d" % iteration) new_constraints = 0 # generate slices through dataset from batch_size if self.batch_size < 1 and not self.batch_size == -1: raise ValueError("batch_size should be integer >= 1 or -1," "got %s." % str(self.batch_size)) batch_size = self.batch_size if self.batch_size != -1 else len(X) n_batches = int(np.ceil(float(len(X)) / batch_size)) slices = gen_even_slices(n_samples, n_batches) indices = np.arange(n_samples) for batch in slices: new_constraints_batch = 0 verbose = max(0, self.verbose - 3) X_b = X[batch] Y_b = Y[batch] indices_b = indices[batch] candidate_constraints = Parallel(n_jobs=self.n_jobs, verbose=verbose)( delayed(find_constraint)( self.model, x, y, self.w) for x, y in zip(X_b, Y_b)) # for each slice, gather new constraints for i, x, y, constraint in zip(indices_b, X_b, Y_b, candidate_constraints): # loop over dataset y_hat, delta_psi, slack, loss = constraint if self.verbose > 3: print("current slack: %f" % slack) if not loss > 0: # can have y != y_hat but loss = 0 in latent svm. # we need this here as dpsi is then != 0 continue if self._check_bad_constraint(y_hat, slack, constraints[i]): continue constraints[i].append([y_hat, delta_psi, loss]) new_constraints_batch += 1 # after processing the slice, solve the qp if new_constraints_batch: objective = self._solve_n_slack_qp(constraints, n_samples) objective_curve.append(objective) new_constraints += new_constraints_batch if new_constraints == 0: print("no additional constraints") break self._compute_training_loss(X, Y, iteration) if self.verbose > 0: print("new constraints: %d, " "dual objective: %f" % (new_constraints, objective)) if (iteration > 1 and objective_curve[-1] - objective_curve[-2] < self.tol): print("objective converged.") break if self.verbose > 5: print(self.w) if self.logger is not None: self.logger(self, iteration) self.constraints_ = constraints self.loss_curve_ = loss_curve self.objective_curve_ = objective_curve print("calls to inference: %d" % self.model.inference_calls) return self
def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars', n_nonzero_coefs=None, alpha=None, copy_cov=True, init=None, max_iter=1000, n_jobs=1): """Sparse coding Each row of the result is the solution to a sparse coding problem. The goal is to find a sparse array `code` such that:: X ~= code * dictionary Parameters ---------- X: array of shape (n_samples, n_features) Data matrix dictionary: array of shape (n_components, n_features) The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows for meaningful output. gram: array, shape=(n_components, n_components) Precomputed Gram matrix, dictionary * dictionary' cov: array, shape=(n_components, n_samples) Precomputed covariance, dictionary' * X algorithm: {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'} lars: uses the least angle regression method (linear_model.lars_path) lasso_lars: uses Lars to compute the Lasso solution lasso_cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). lasso_lars will be faster if the estimated components are sparse. omp: uses orthogonal matching pursuit to estimate the sparse solution threshold: squashes to zero all coefficients less than alpha from the projection dictionary * X' n_nonzero_coefs: int, 0.1 * n_features by default Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. alpha: float, 1. by default If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threhold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. init: array of shape (n_samples, n_components) Initialization value of the sparse codes. Only used if `algorithm='lasso_cd'`. max_iter: int, 1000 by default Maximum number of iterations to perform if `algorithm='lasso_cd'`. copy_cov: boolean, optional Whether to copy the precomputed covariance matrix; if False, it may be overwritten. n_jobs: int, optional Number of parallel jobs to run. Returns ------- code: array of shape (n_samples, n_components) The sparse codes See also -------- sklearn.linear_model.lars_path sklearn.linear_model.orthogonal_mp sklearn.linear_model.Lasso SparseCoder """ dictionary = array2d(dictionary) X = array2d(X) n_samples, n_features = X.shape n_components = dictionary.shape[0] if gram is None and algorithm != 'threshold': gram = np.dot(dictionary, dictionary.T) if cov is None: copy_cov = False cov = np.dot(dictionary, X.T) if algorithm in ('lars', 'omp'): regularization = n_nonzero_coefs if regularization is None: regularization = max(n_features / 10, 1) else: regularization = alpha if regularization is None: regularization = 1. if n_jobs == 1 or algorithm == 'threshold': return _sparse_encode(X, dictionary, gram, cov=cov, algorithm=algorithm, regularization=regularization, copy_cov=copy_cov, init=init, max_iter=max_iter) # Enter parallel code block code = np.empty((n_samples, n_components)) slices = list(gen_even_slices(n_samples, n_jobs)) code_views = Parallel(n_jobs=n_jobs)( delayed(_sparse_encode)( X[this_slice], dictionary, gram, cov[:, this_slice], algorithm, regularization=regularization, copy_cov=copy_cov, init=init[this_slice] if init is not None else None, max_iter=max_iter) for this_slice in slices) for this_slice, this_view in zip(slices, code_views): code[this_slice] = this_view return code
def fit(self, X, y=None): """ Fit the model to the data X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. Returns ------- self """ X = atleast2d_or_csr(X, dtype=np.float64, order="C") n_samples, n_features = X.shape self._init_fit(n_features) self._init_param() self._init_t_eta_() if self.shuffle_data: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not use mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / batch_size batch_slices = list( gen_even_slices( n_batches * batch_size, n_batches)) # preallocate memory a_hidden = np.empty((batch_size, self.n_hidden)) a_output = np.empty((batch_size, n_features)) delta_o = np.empty((batch_size, n_features)) if self.algorithm == 'sgd': prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost = self.backprop_sgd( X[batch_slice], n_features, batch_size, delta_o, a_hidden, a_output) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost self.t_ += 1 elif self.algorithm == 'l-bfgs': self._backprop_lbfgs( X, n_features, a_hidden, a_output, delta_o, n_samples) return self
def fit(self, X, Y, constraints=None, warm_start=None, initialize=True): """Learn parameters using cutting plane method. Parameters ---------- X : iterable Traing instances. Contains the structured input objects. No requirement on the particular form of entries of X is made. Y : iterable Training labels. Contains the strctured labels for inputs in X. Needs to have the same length as X. contraints : iterable Known constraints for warm-starts. List of same length as X. Each entry is itself a list of constraints for a given instance x . Each constraint is of the form [y_hat, delta_joint_feature, loss], where y_hat is a labeling, ``delta_joint_feature = joint_feature(x, y) - joint_feature(x, y_hat)`` and loss is the loss for predicting y_hat instead of the true label y. initialize : boolean, default=True Whether to initialize the model for the data. Leave this true except if you really know what you are doing. """ if self.verbose: print("Training n-slack dual structural SVM") cvxopt.solvers.options['show_progress'] = self.verbose > 3 if initialize: self.model.initialize(X, Y) self.w = np.zeros(self.model.size_joint_feature) n_samples = len(X) stopping_criterion = False if constraints is None: # fresh start constraints = [[] for i in range(n_samples)] self.last_active = [[] for i in range(n_samples)] self.objective_curve_ = [] self.primal_objective_curve_ = [] self.timestamps_ = [time()] else: # warm start objective = self._solve_n_slack_qp(constraints, n_samples) try: # catch ctrl+c to stop training # we have to update at least once after going through the dataset for iteration in range(self.max_iter): # main loop self.timestamps_.append(time() - self.timestamps_[0]) if self.verbose > 0: print("iteration %d" % iteration) if self.verbose > 2: print(self) new_constraints = 0 # generate slices through dataset from batch_size if self.batch_size < 1 and not self.batch_size == -1: raise ValueError("batch_size should be integer >= 1 or -1," "got %s." % str(self.batch_size)) batch_size = (self.batch_size if self.batch_size != -1 else len(X)) n_batches = int(np.ceil(float(len(X)) / batch_size)) slices = gen_even_slices(n_samples, n_batches) indices = np.arange(n_samples) slack_sum = 0 for batch in slices: new_constraints_batch = 0 verbose = max(0, self.verbose - 3) X_b = X[batch] Y_b = Y[batch] indices_b = indices[batch] candidate_constraints = Parallel( n_jobs=self.n_jobs, verbose=verbose)( delayed(find_constraint)(self.model, x, y, self.w) for x, y in zip(X_b, Y_b)) # for each batch, gather new constraints for i, x, y, constraint in zip(indices_b, X_b, Y_b, candidate_constraints): # loop over samples in batch y_hat, delta_joint_feature, slack, loss = constraint slack_sum += slack if self.verbose > 3: print("current slack: %f" % slack) if not loss > 0: # can have y != y_hat but loss = 0 in latent svm. # we need this here as djoint_feature is then != 0 continue if self._check_bad_constraint(y_hat, slack, constraints[i]): continue constraints[i].append([y_hat, delta_joint_feature, loss]) new_constraints_batch += 1 # after processing the slice, solve the qp if new_constraints_batch: objective = self._solve_n_slack_qp(constraints, n_samples) new_constraints += new_constraints_batch self.objective_curve_.append(objective) self._compute_training_loss(X, Y, iteration) primal_objective = (self.C * slack_sum + np.sum(self.w ** 2) / 2) self.primal_objective_curve_.append(primal_objective) if self.verbose > 0: print("new constraints: %d, " "cutting plane objective: %f primal objective: %f" % (new_constraints, objective, primal_objective)) if new_constraints == 0: if self.verbose: print("no additional constraints") stopping_criterion = True if (iteration > 1 and self.objective_curve_[-1] - self.objective_curve_[-2] < self.tol): if self.verbose: print("objective converged.") stopping_criterion = True if stopping_criterion: if (self.switch_to is not None and self.model.inference_method != self.switch_to): if self.verbose: print("Switching to %s inference" % str(self.switch_to)) self.model.inference_method_ = \ self.model.inference_method self.model.inference_method = self.switch_to stopping_criterion = False continue else: break if self.verbose > 5: print(self.w) if self.logger is not None: self.logger(self, iteration) except KeyboardInterrupt: pass self.constraints_ = constraints if self.verbose and self.n_jobs == 1: print("calls to inference: %d" % self.model.inference_calls) if verbose: print("Computing final objective.") self.timestamps_.append(time() - self.timestamps_[0]) self.primal_objective_curve_.append(self._objective(X, Y)) self.objective_curve_.append(objective) if self.logger is not None: self.logger(self, 'final') return self
def fit(self, X, y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. y : numpy array of shape (n_samples) Subset of the target values. Returns ------- self """ X = atleast2d_or_csr(X) self._validate_params() n_samples, self.n_features = X.shape self.n_outputs = y.shape[1] if not self.warm_start: self._init_t_eta_() self._init_fit() self._init_param() else: if self.t_ is None or self.coef_hidden_ is None: self._init_t_eta_() self._init_fit() self._init_param() if self.shuffle: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not use mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / batch_size batch_slices = list( gen_even_slices( n_batches * batch_size, n_batches)) # preallocate memory a_hidden, a_output, delta_o = self._preallocate_memory( batch_size) if self.algorithm == 'sgd': prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost = self._backprop_sgd( X[batch_slice], y[batch_slice], batch_size, a_hidden, a_output, delta_o) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost self.t_ += 1 elif 'l-bfgs': self._backprop_lbfgs( X, y, n_samples, a_hidden, a_output, delta_o) return self
def fit(self, X, Y, H_init=None, warm_start=False, initialize=True): """Learn parameters using subgradient descent. Parameters ---------- X : iterable Traing instances. Contains the structured input objects. No requirement on the particular form of entries of X is made. Y : iterable Training labels. Contains the strctured labels for inputs in X. Needs to have the same length as X. constraints : None Discarded. Only for API compatibility currently. warm_start : boolean, default=False Whether to restart a previous fit. initialize : boolean, default=True Whether to initialize the model for the data. Leave this true except if you really know what you are doing. """ if self.verbose > 0: print("Training latent subgradient structural SVM") if initialize: self.model.initialize(X, Y) self.grad_old = np.zeros(self.model.size_joint_feature) if not warm_start: self.w = getattr(self, "w", np.random.normal( 0, 1, size=self.model.size_joint_feature)) self.timestamps_ = [time()] self.objective_curve_ = [] if self.learning_rate == "auto": self.learning_rate_ = self.C * len(X) else: self.learning_rate_ = self.learning_rate else: # hackety hack self.timestamps_[0] = time() - self.timestamps_[-1] w = self.w.copy() n_samples = len(X) try: # catch ctrl+c to stop training for iteration in xrange(self.max_iter): self.timestamps_.append(time() - self.timestamps_[0]) positive_slacks = 0 objective = 0. #verbose = max(0, self.verbose - 3) if self.n_jobs == 1: # online learning for x, y in zip(X, Y): h = self.model.latent(x, y, w) h_hat = self.model.loss_augmented_inference( x, h, w, relaxed=True) delta_joint_feature = (self.model.joint_feature(x, h) - self.model.joint_feature(x, h_hat)) slack = (-np.dot(delta_joint_feature, w) + self.model.loss(h, h_hat)) objective += np.maximum(slack, 0) if slack > 0: positive_slacks += 1 w = self._solve_subgradient(delta_joint_feature, n_samples, w) else: #generate batches of size n_jobs #to speed up inference if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.j_jobs n_batches = int(np.ceil(float(len(X)) / n_jobs)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] verbose = self.verbose - 1 candidate_constraints = Parallel( n_jobs=self.n_jobs, verbose=verbose)(delayed(find_constraint_latent)( self.model, x, y, w) for x, y in zip(X_b, Y_b)) djoint_feature = np.zeros(self.model.size_joint_feature) for x, y, constraint in zip(X_b, Y_b, candidate_constraints): y_hat, delta_joint_feature, slack, loss = constraint objective += slack djoint_feature += delta_joint_feature if slack > 0: positive_slacks += 1 djoint_feature /= float(len(X_b)) w = self._solve_subgradient(djoint_feature, n_samples, w) # some statistics objective *= self.C objective += np.sum(self.w ** 2) / 2. if positive_slacks == 0: print("No additional constraints") if self.break_on_no_constraints: break if self.verbose > 0: print(self) print("iteration %d" % iteration) print("positive slacks: %d, " "objective: %f" % (positive_slacks, objective)) self.objective_curve_.append(objective) if self.verbose > 2: print(self.w) self._compute_training_loss(X, Y, iteration) if self.logger is not None: self.logger(self, iteration) except KeyboardInterrupt: pass self.timestamps_.append(time() - self.timestamps_[0]) self.objective_curve_.append(self._objective(X, Y)) if self.logger is not None: self.logger(self, 'final') if self.verbose: if self.objective_curve_: print("final objective: %f" % self.objective_curve_[-1]) if self.verbose and self.n_jobs == 1: print("calls to inference: %d" % self.model.inference_calls) return self
def fit(self, X, validation=None): """Fit the model to the data X. X : {array-like, sparse matrix} shape (n_samples, n_features) Training data. validation : {array-like, sparse matrix} Returns ------- self : BernoulliRBM The fitted model. """ X = check_array(X, accept_sparse='csr', dtype=np.float) n_samples = X.shape[0] if not hasattr(self, 'components_'): self.components_ = np.asarray( self.rng_.normal(0, 0.01, (self.n_components, X.shape[1])), order='fortran') self.intercept_hidden_ = np.zeros(self.n_components, ) # 'It is usually helpful to initialize the bias of visible unit i to log[p_i/(1-p_i)] where p_i is the prptn of training vectors where i is on' - Practical Guide # TODO: Make this configurable? if 1: counts = X.sum(axis=0).A.reshape(-1) # There should be no units that are always on assert np.max(counts) < X.shape[0], "Found a visible unit always on in the training data. Fishy." # There might be some units never on. Add a pseudo-count of 1 to avoid inf vis_priors = (counts + 1) / float(X.shape[0]) self.intercept_visible_ = np.log( vis_priors / (1 - vis_priors) ) else: self.intercept_visible_ = np.zeros(X.shape[1], ) # If this already *does* have weights and biases before fit() is called, # we'll start from them rather than wiping them out. May want to train # a model further with a different learning rate, or even on a different # dataset. else: print "Reusing existing weights and biases" # Don't necessarily want to reuse h_samples if we have one leftover from before - batch size might have changed self.h_samples_ = np.zeros((self.batch_size * self.fantasy_to_batch, self.n_components)) # Add new inner lists for this session if not hasattr(self, 'history'): self.history = {'pseudo-likelihood': [], 'overfit': []} for session in self.history.itervalues(): session.append([]) n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches, n_samples)) verbose = self.verbose begin = time.time() for iteration in xrange(1, self.n_iter + 1): if self.lr_backoff: # If, e.g., we're doing 10 epochs, use the full learning rate for # the first iteration, 90% of the base learning rate for the second # iteration... and 10% for the final iteration self.learning_rate = ((self.n_iter - (iteration - 1)) / (self.n_iter+0.0)) * self.base_learning_rate print "Using learning rate of {:.3f} (base LR={:.3f})".format(self.learning_rate, self.base_learning_rate) for batch_slice in batch_slices: self._fit(X[batch_slice]) if verbose and iteration != self.n_iter: end = time.time() self.wellness_check(iteration, end - begin, X, validation) begin = end if iteration != self.n_iter: X = shuffle(X) return self