def omp_n_predict(X, centroids, num_active): ''' omp prediction Input: X: the data matrix centroids: the centroids matrix num_active: the number of active components ''' idx = np.empty((X.shape[0], num_active), dtype=np.int) val = np.zeros((X.shape[0], num_active)) C = mathutil.dot(centroids, centroids.T) # find the most active k dots = None dots_abs = None for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start if dots is None: dots = mathutil.dot(X[start:end], centroids.T) dots_abs = np.empty_like(dots) else: mathutil.dot(X[start:end], centroids.T, out=dots[:batchsize]) # we only compute the dots once, and keep them for future use for i in range(num_active): np.abs(dots, out=dots_abs) idx[start:end, i] = np.argmax(dots_abs[:batchsize], axis=1) val[start:end, i] = dots[np.arange(batchsize), idx[start:end, i]] # remove the effect from dots dots[:batchsize] -= C[idx[start:end, i]] * \ val[start:end, i][:, np.newaxis] return idx, val
def testdot_with_out(self): for A, B in self.test_matrices: result_ref = np.dot(A, B) result = np.empty(result_ref.shape, dtype=A.dtype) mathutil.dot(A, B, out=result) self.assertTrue(result.flags['C_CONTIGUOUS']) np.testing.assert_array_almost_equal(result, result_ref)
def omp_n_predict(X, centroids, num_active): """ omp prediction Input: X: the data matrix centroids: the centroids matrix num_active: the number of active components """ idx = np.empty((X.shape[0], num_active), dtype=np.int) val = np.zeros((X.shape[0], num_active)) C = mathutil.dot(centroids, centroids.T) # find the most active k dots = None dots_abs = None for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start if dots is None: dots = mathutil.dot(X[start:end], centroids.T) dots_abs = np.empty_like(dots) else: mathutil.dot(X[start:end], centroids.T, out=dots[:batchsize]) # we only compute the dots once, and keep them for future use for i in range(num_active): np.abs(dots, out=dots_abs) idx[start:end, i] = np.argmax(dots_abs[:batchsize], axis=1) val[start:end, i] = dots[np.arange(batchsize), idx[start:end, i]] # remove the effect from dots dots[:batchsize] -= C[idx[start:end, i]] * val[start:end, i][:, np.newaxis] return idx, val
def testdot_with_out(self): for A, B in self.test_matrices: result_ref = np.dot(A,B) result = np.empty(result_ref.shape, dtype = A.dtype) mathutil.dot(A, B, out = result) self.assertTrue(result.flags['C_CONTIGUOUS']) np.testing.assert_array_almost_equal(result, result_ref)
def obj(wb, solver): """ The objective function used by fmin """ # obtain w and b K = solver._K dim = solver._dim w = wb[: K * dim].reshape((dim, K)) b = wb[K * dim :] # pred is a matrix of size [num_datalocal, K] pred = mathutil.dot(solver._X, w) pred += b # compute the loss function flocal, gpred = solver.loss(solver._Y, pred, solver._weight, **solver._lossargs) glocal = np.empty(wb.shape) glocal[: K * dim] = mathutil.dot(solver._X.T, gpred).flat glocal[K * dim :] = gpred.sum(axis=0) # add regularization term, but keep in mind that we have multiple nodes freg, greg = solver.reg(w, **solver._regargs) flocal += solver._num_data * solver._gamma * freg / mpi.SIZE glocal[: K * dim] += solver._num_data * solver._gamma / mpi.SIZE * greg.ravel() # do mpi reduction mpi.barrier() f = mpi.COMM.allreduce(flocal) g = np.empty(glocal.shape, dtype=glocal.dtype) mpi.COMM.Allreduce(glocal, g) return f, g
def process(self, image): '''Performs llc encoding. ''' K = self.specs.get('k', 5) reg = self.specs.get('reg', 1e-4) D = self.dictionary shape = image.shape[:-1] X = image.reshape((np.prod(shape), image.shape[-1])) # D_norm is the precomputed norm of the entries if 'D_norm' not in self.specs: self.specs['D_norm'] = (D**2).sum(1) / 2. D_norm = self.specs['D_norm'] distance = mathutil.dot(X, -D.T) distance += D_norm # find the K closest indices if bn is not None: # use bottleneck which would be faster IDX = bn.argpartsort(distance, K, axis=1)[:, :K] else: IDX = np.argsort(distance,1)[:, :K] # do LLC approximate coding coeff = np.zeros((X.shape[0], D.shape[0])) ONES = np.ones(K) Z = np.empty((K, D.shape[1])) for i in range(X.shape[0]): # shift to origin Z[:] = D[IDX[i]] Z -= X[i] # local covariance C = mathutil.dot(Z,Z.T) # add regularization C.flat[::K+1] += reg * C.trace() w = np.linalg.solve(C,ONES) coeff[i][IDX[i]] = w / w.sum() return coeff.reshape(shape + (coeff.shape[1],))
def get_predictions_nn(X, weights,arch): hid = mathutil.dot(X,weights[0])+weights[1] hid = 1.0/(1+np.exp(-hid)) #sigmoid pred = mathutil.dot(hid,weights[2])+weights[3] #prob = pred - pred.max(axis=1)[:,np.newaxis] #mathutil.exp(prob, out=prob) #prob /= prob.sum(axis=1)[:, np.newaxis] prob = 1.0/(1.0+np.exp(-pred)) prob = mpi.COMM.gather(prob) hid = mpi.COMM.gather(hid) if mpi.is_root(): return np.vstack(prob),np.vstack(hid) else: return np.zeros((0)),np.zeros((0))
def coclassify_dag(self, prob, classifier_weight=1.): """Perform co-classification when the hidden concept is organized as a DAG. """ if self.graph == None: raise ValueError, "No graph given." # compute log(prob(y|s)) first prob_ys = mathutil.dot(prob, self.invconfprob.T) prob_ys /= prob_ys.sum(1)[:, np.newaxis] prob_ys += np.finfo(np.float64).eps np.log(prob_ys, out=prob_ys) # using the tree structure to compute the max_{y\in c} prob_ys for # each concept. # basically, we need to follow the inverse topological order max_prob_ys_in_c = np.zeros( (prob_ys.shape[0], len(self.graph.nodes()))) for c in self.invtoporder: cid = self.concept2id[c] succ = [self.concept2id[s] for s in self.graph.successors(c)] if len(succ) == 0: # I am a leaf node max_prob_ys_in_c[:, cid] = prob_ys[:, self.leaf2id[c]] else: max_prob_ys_in_c[:,cid] = np.max(\ max_prob_ys_in_c[:, succ], axis=1) # now, combine the upstream probability and downstream probability to find # the argmax score = self.logprior + max_prob_ys_in_c.sum(0) * classifier_weight \ - self.log_concept_sizes * prob.shape[0] best_cid = score.argmax() slice = np.array(list(self.membership_map[best_cid])) best_labels = slice[prob_ys[:, slice].argmax(1)] return best_cid, best_labels
def coclassify_dag(self, prob, classifier_weight=1.0): """Perform co-classification when the hidden concept is organized as a DAG. """ if self.graph == None: raise ValueError, "No graph given." # compute log(prob(y|s)) first prob_ys = mathutil.dot(prob, self.invconfprob.T) prob_ys /= prob_ys.sum(1)[:, np.newaxis] prob_ys += np.finfo(np.float64).eps np.log(prob_ys, out=prob_ys) # using the tree structure to compute the max_{y\in c} prob_ys for # each concept. # basically, we need to follow the inverse topological order max_prob_ys_in_c = np.zeros((prob_ys.shape[0], len(self.graph.nodes()))) for c in self.invtoporder: cid = self.concept2id[c] succ = [self.concept2id[s] for s in self.graph.successors(c)] if len(succ) == 0: # I am a leaf node max_prob_ys_in_c[:, cid] = prob_ys[:, self.leaf2id[c]] else: max_prob_ys_in_c[:, cid] = np.max(max_prob_ys_in_c[:, succ], axis=1) # now, combine the upstream probability and downstream probability to find # the argmax score = self.logprior + max_prob_ys_in_c.sum(0) * classifier_weight - self.log_concept_sizes * prob.shape[0] best_cid = score.argmax() slice = np.array(list(self.membership_map[best_cid])) best_labels = slice[prob_ys[:, slice].argmax(1)] return best_cid, best_labels
def obj(wb,solver): ''' The objective function used by fmin ''' # obtain w and b K = solver._K dim = solver._dim w = wb[:K*dim].reshape((dim, K)) b = wb[K*dim:] # pred is a matrix of size [num_datalocal, K] mathutil.dot(solver._X, w, out = solver._pred) solver._pred += b # compute the loss function if solver.gpredcache: flocal,gpred = solver.loss(solver._Y, solver._pred, solver._weight, solver._gpred, solver._gpredcache, **solver._lossargs) else: flocal,gpred = solver.loss(solver._Y, solver._pred, solver._weight, **solver._lossargs) mathutil.dot(solver._X.T, gpred, out = solver._glocal[:K*dim].reshape(dim, K)) solver._glocal[K*dim:] = gpred.sum(axis=0) # we should normalize them with the number of data flocal /= solver._num_data solver._glocal /= solver._num_data # add regularization term, but keep in mind that we have multiple nodes # so we only carry it out on root to make sure we only added one # regularization term if mpi.is_root(): freg, greg = solver.reg(w, **solver._regargs) flocal += solver._gamma * freg solver._glocal[:K*dim] += solver._gamma * greg.ravel() # do mpi reduction mpi.barrier() f = mpi.COMM.allreduce(flocal) mpi.COMM.Allreduce(solver._glocal, solver._g) ######### DEBUG PART ############## if np.isnan(f): # check all the components to see what went wrong. print 'rank %s: isnan X: %d' % (mpi.RANK,np.any(np.isnan(solver._X))) print 'rank %s: isnan Y: %d' % (mpi.RANK,np.any(np.isnan(solver._Y))) print 'rank %s: isnan flocal: %d' % (mpi.RANK,np.any(np.isnan(flocal))) print 'rank %s: isnan pred: %d' % (mpi.RANK,np.any(np.isnan(solver._pred))) print 'rank %s: isnan w: %d' % (mpi.RANK,np.any(np.isnan(w))) print 'rank %s: isnan b: %d' % (mpi.RANK,np.any(np.isnan(b))) return f, solver._g
def soap(self, X, out = None): """Performs second-order average pooling on a n*k matrix X. Returns a k*(k+1)/2 vector that is the upper triangular part of the average pooled region. """ X = np.ascontiguousarray(X, dtype = np.float64) self._update_cache(X) if self._use_cov: self._cache_gram_matrix= np.cov(X, rowvar=0) else: mathutil.dot(X.T, X, out = self._cache_gram_matrix) self._cache_gram_matrix /= X.shape[0] if out is None: out = np.empty(X.shape[1] * (X.shape[1] + 1) / 2) out[:] = self._cache_gram_matrix[self._cache_triu_id[0],\ self._cache_triu_id[1]] return out
def log_soap(self, X, out = None): """Performs matrix_log(second order average pooling) """ X = np.ascontiguousarray(X, dtype = np.float64) self._update_cache(X) if self._use_cov: self._cache_gram_matrix= np.cov(X, rowvar=0) else: mathutil.dot(X.T, X, out = self._cache_gram_matrix) self._cache_gram_matrix /= X.shape[0] self._cache_gram_matrix.flat[::X.shape[1]+1] += self._reg # compute the matrix log logmat = scipy.linalg.logm(self._cache_gram_matrix) if out is None: out = np.empty(X.shape[1] * (X.shape[1] + 1) / 2) out[:] = logmat[self._cache_triu_id[0], self._cache_triu_id[1]] return out
def get_predictions_logreg_perclass(X, weights): pred = mathutil.dot(X,weights[0])+weights[1] prob = 1.0/(1.0+np.exp(-pred)) prob = mpi.COMM.gather(prob) if mpi.is_root(): return np.vstack(prob) else: return np.zeros((0))
def get_predictions_logreg(X, weights): pred = mathutil.dot(X,weights[0])+weights[1] prob = pred - pred.max(axis=1)[:,np.newaxis] mathutil.exp(prob, out=prob) prob /= prob.sum(axis=1)[:, np.newaxis] prob = mpi.COMM.gather(prob) if mpi.is_root(): return np.vstack(prob) else: return np.zeros((0))
def coclassify_oracle(self, prob, cid): """Perform coclassification when the hidden concept is just given in cid """ prob_ys = mathutil.dot(prob, self.invconfprob.T) prob_ys /= prob_ys.sum(1)[:, np.newaxis] prob_ys += np.finfo(np.float64).eps np.log(prob_ys, out=prob_ys) slice = np.array(list(self.membership_map[cid])) best_labels = slice[prob_ys[:, slice].argmax(1)] return cid, best_labels
def obj(wb,solver): ''' The objective function used by fmin ''' # obtain w and b Khidden = solver._Khidden dim = solver._dim whidden = wb[:Khidden*dim].reshape((dim, Khidden)) tree = solver._regargs['tree'] w = mathutil.dot(whidden, tree) b = wb[Khidden*dim:] # pred is a matrix of size [num_datalocal, K] mathutil.dot(solver._X, w, out = solver._pred) solver._pred += b # compute the loss function flocal,gpred = solver.loss(solver._Y, solver._pred, solver._weight, **solver._lossargs) mathutil.dot(mathutil.dot(solver._X.T, gpred), tree.T, out = solver._glocal[:Khidden*dim].reshape(dim, Khidden)) solver._glocal[Khidden*dim:] = gpred.sum(axis=0) # add regularization term, but keep in mind that we have multiple nodes freg, greg = solver.reg(whidden, **solver._regargs) flocal += solver._num_data * solver._gamma * freg / mpi.SIZE solver._glocal[:Khidden*dim] += solver._num_data * solver._gamma \ * greg.ravel() / mpi.SIZE # do mpi reduction mpi.barrier() f = mpi.COMM.allreduce(flocal) mpi.COMM.Allreduce(solver._glocal, solver._g) return f, solver._g
def process(self, image, out=None): '''Performs llc encoding. ''' K = self.specs.get('k', 5) reg = self.specs.get('reg', 1e-4) D = self.dictionary shape = image.shape[:-1] X = image.reshape((np.prod(shape), image.shape[-1])) # D_norm is the precomputed norm of the entries if 'D_norm' not in self.specs: self.specs['D_norm'] = (D**2).sum(1) / 2. D_norm = self.specs['D_norm'] distance = mathutil.dot(X, -D.T) distance += D_norm # find the K closest indices if bn is not None: # use bottleneck which would be faster IDX = bn.argpartsort(distance, K, axis=1)[:, :K] else: IDX = np.argsort(distance,1)[:, :K] # do LLC approximate coding if out is None: out = np.zeros((X.shape[0], D.shape[0])) else: out.resize((X.shape[0], D.shape[0])) out[:] = 0 ONES = np.ones(K) Z = np.empty((K, D.shape[1])) for i in range(X.shape[0]): # shift to origin Z[:] = D[IDX[i]] Z -= X[i] # local covariance C = mathutil.dot(Z,Z.T) # add regularization C.flat[::K+1] += reg * C.trace() w = np.linalg.solve(C,ONES) out[i][IDX[i]] = w / w.sum() out.resize(shape + (out.shape[1],)) return out
def omp_n_maximize(X, labels, val, k): """Maximization of omp_n, with the given labels and vals. Note that X is the local data hosted in each MPI node. """ dim = X.shape[1] # G is the gram matrix of the activations AtA_local = np.zeros((k, k)) AtX_local = np.zeros((k, dim)) A = None for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start if A is None: A = np.zeros((batchsize, k)) else: A[:] = 0 for i in range(batchsize): A[i, labels[start + i]] = val[start + i] AtA_local += mathutil.dot(A.T, A) AtX_local += mathutil.dot(A[:batchsize].T, X[start:end]) AtA = np.empty_like(AtA_local) AtX = np.empty_like(AtX_local) mpi.COMM.Allreduce(AtA_local, AtA) mpi.COMM.Allreduce(AtX_local, AtX) # add a regularization term isempty = np.diag(AtA) == 0 AtA.flat[:: k + 1] += 1e-8 centroids = np.ascontiguousarray(np.linalg.solve(AtA, AtX)) # let's deal with inactive guys for i in range(k): if isempty[i]: # randomly restart one centroids[i] = X[np.random.randint(X.shape[0])] mpi.COMM.Bcast(centroids[i], root=mpi.elect()) scale = np.sqrt((centroids ** 2).sum(1)) + np.finfo(np.float64).eps centroids /= scale[:, np.newaxis] return centroids
def omp_n_maximize(X, labels, val, k): '''Maximization of omp_n, with the given labels and vals. Note that X is the local data hosted in each MPI node. ''' dim = X.shape[1] # G is the gram matrix of the activations AtA_local = np.zeros((k, k)) AtX_local = np.zeros((k, dim)) A = None for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start if A is None: A = np.zeros((batchsize, k)) else: A[:] = 0 for i in range(batchsize): A[i, labels[start + i]] = val[start + i] AtA_local += mathutil.dot(A.T, A) AtX_local += mathutil.dot(A[:batchsize].T, X[start:end]) AtA = np.empty_like(AtA_local) AtX = np.empty_like(AtX_local) mpi.COMM.Allreduce(AtA_local, AtA) mpi.COMM.Allreduce(AtX_local, AtX) # add a regularization term isempty = (np.diag(AtA) == 0) AtA.flat[::k + 1] += 1e-8 centroids = np.ascontiguousarray(np.linalg.solve(AtA, AtX)) # let's deal with inactive guys for i in range(k): if isempty[i]: # randomly restart one centroids[i] = X[np.random.randint(X.shape[0])] mpi.COMM.Bcast(centroids[i], root=mpi.elect()) scale = np.sqrt((centroids**2).sum(1)) + np.finfo(np.float64).eps centroids /= scale[:, np.newaxis] return centroids
def omp1_predict(X, centroids): ''' omp1 prediction This function does one-dimensional orthogonal matching pursuit. the returned values are simply going to be the indices and inner products. ''' idx = np.empty(X.shape[0], dtype=np.int) val = np.empty(X.shape[0]) # in case we are going to deal with a large matrix, we buffer dots to avoid # multiple memory new / deletes. dots = np.empty((min(_MINIBATCH, X.shape[0]), centroids.shape[0]), dtype=X.dtype) for start in range(0, X.shape[0], _MINIBATCH): end = min(start + _MINIBATCH, X.shape[0]) batchsize = end - start mathutil.dot(X[start:end], centroids.T, out=dots[:batchsize]) np.abs(dots, out=dots) idx[start:end] = np.argmax(dots[:batchsize], axis=1) val[start:end] = dots[range(batchsize), idx[start:end]] mpi.barrier() return idx, val
def omp1_predict(X, centroids): ''' omp1 prediction This function does one-dimensional orthogonal matching pursuit. the returned values are simply going to be the indices and inner products. ''' idx = np.empty(X.shape[0], dtype=np.int) val = np.empty(X.shape[0]) # in case we are going to deal with a large matrix, we buffer dots to avoid # multiple memory new / deletes. dots = np.empty((min(_MINIBATCH, X.shape[0]), centroids.shape[0]), dtype = X.dtype) for start in range(0, X.shape[0], _MINIBATCH): end = min(start+_MINIBATCH, X.shape[0]) batchsize = end-start mathutil.dot(X[start:end], centroids.T, out = dots[:batchsize]) np.abs(dots, out=dots) idx[start:end] = np.argmax(dots[:batchsize], axis=1) val[start:end] = dots[range(batchsize), idx[start:end]] mpi.barrier() return idx, val
def train(self, incoming_patches): size = mpi.COMM.allreduce(incoming_patches.shape[0]) b = - mpi.COMM.allreduce(np.sum(incoming_patches,axis=0)) / size # remove the mean from data patches = incoming_patches + b covmat = mpi.COMM.allreduce(mathutil.dot(patches.T, patches)) / size if mpi.RANK == 0: eigval, eigvec = np.linalg.eigh(covmat) reg = self.specs.get('reg', np.finfo(np.float64).eps) W = eigvec * 1.0 / (np.sqrt(np.maximum(eigval, 0.0)) + reg) else: eigval, eigvec, W = None, None, None W = mpi.COMM.bcast(W) eigval = mpi.COMM.bcast(eigval) eigvec = mpi.COMM.bcast(eigvec) return (W, b), (eigval, eigvec)
confmats = None for i in range(mpi.RANK, len(files), mpi.SIZE): # for the i-th file the gt label is i file = files[i] predfile = preds[i] fid = h5py.File(file, 'r') features = np.array(fid['features']) fid.close() fid = h5py.File(predfile, 'r') pred = np.array(fid['pred']) fid.close() # compute new prediction diff = mathutil.softmax(pred) diff[:, i] -= 1 features **= 2 weighted_direction = mathutil.dot(features, Hwinv) + Hbinv dpred = diff * weighted_direction if use_validation: iter = 0 accu = sum((val_pred == i) & (val_label == i)) / float( sum(val_label == i)) s_low = 0 s_high = scales[0] while iter < 10: s = (s_low + s_high) / 2 newprob = pred + dpred * s mathutil.softmax(newprob, out=newprob) newpred = newprob.argmax(1) my_accu = sum(newpred == i) / float(pred.shape[0]) if my_accu > accu: s_low = s
def forward(X, outputs): output = mathutil.dot(X, w) output += b
def testdot(self): for A, B in self.test_matrices: result = mathutil.dot(A, B) result_ref = np.dot(A, B) self.assertTrue(result.flags['C_CONTIGUOUS']) np.testing.assert_array_almost_equal(result, result_ref)
files = glob.glob(os.path.join(FLAGS.folder, '*.mat')) files.sort() count = 0 for i in range(mpi.RANK, len(files), mpi.SIZE): file = files[i] print '%d / %d: %s' % (i, len(files), file) fid = h5py.File(file, 'r') features = np.array(fid['features']) count += features.shape[0] pred = np.dot(features, w) pred += b prob = mathutil.softmax(pred) dpdp = prob * (1 - prob) # the gradient for b is simple hb += np.sum(dpdp, 0) features **= 2 hw += mathutil.dot(features.T, dpdp) fid.close() del features count = mpi.COMM.allreduce(count) # we no longer need w and b, so we use them as mpi buffer mpi.COMM.Allreduce(hw, w) mpi.COMM.Allreduce(hb, b) if mpi.is_root(): hw /= count hb /= count # add regularization term hw += 2 * FLAGS.reg np.savez(FLAGS.output, count=count, hw=w, hb=b)
confmats = None for i in range(mpi.RANK, len(files), mpi.SIZE): # for the i-th file the gt label is i file = files[i] predfile = preds[i] fid = h5py.File(file, 'r') features = np.array(fid['features']) fid.close() fid = h5py.File(predfile, 'r') pred = np.array(fid['pred']) fid.close() # compute new prediction diff = mathutil.softmax(pred) diff[:,i] -= 1 features **= 2 weighted_direction = mathutil.dot(features, Hwinv) + Hbinv dpred = diff * weighted_direction if use_validation: iter = 0 accu = sum((val_pred == i) & (val_label == i)) / float(sum(val_label == i)) s_low = 0 s_high = scales[0] while iter < 10: s = (s_low + s_high) / 2 newprob = pred + dpred * s mathutil.softmax(newprob, out=newprob) newpred = newprob.argmax(1) my_accu = sum(newpred==i) / float(pred.shape[0]) if my_accu > accu: s_low = s else:
def loss_multiclass_logreg(Y, X, weights): pred = mathutil.dot(X,weights[0])+weights[1] local_likelihood = classifier.Loss.loss_multiclass_logistic(classifier.to_one_of_k_coding(Y, 0), pred, None)[0] likelihood = mpi.COMM.allreduce(local_likelihood) num_data = mpi.COMM.allreduce(len(Y)) return float(likelihood) / num_data
def testdot(self): for A, B in self.test_matrices: result = mathutil.dot(A, B) result_ref = np.dot(A,B) self.assertTrue(result.flags['C_CONTIGUOUS']) np.testing.assert_array_almost_equal(result, result_ref)