def test_triangle_code(): rng = np.random.RandomState([20,18,9]) m = 5 n = 6 k = 7 X = as_floatX(rng.randn(m,n)) D = as_floatX(rng.randn(k,n)) D_norm_squared = np.sum(D**2,axis=1) X_norm_squared = np.sum(X**2,axis=1) sq_distance = -2.0 * np.dot(X,D.T) + D_norm_squared + np.atleast_2d(X_norm_squared).T distance = np.sqrt(sq_distance) mu = np.mean(distance, axis = 1) expected = np.maximum(0.0,mu.reshape(mu.size,1)-distance) Xv = T.matrix() Dv = T.matrix() code = triangle_code(X = Xv, centroids = Dv) actual = function([Xv,Dv],code)(X,D) assert np.allclose(expected, actual)
def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print('restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive') full_X = full_X[self.restrict[0]:self.restrict[1],:] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) ) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print('defining features') V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print('compiling theano function') f = function([V],feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1,2)) ) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1,2)) ) else: assert False def average_pool( stride ): def point( p ): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32') for i in xrange(stride): for j in xrange(stride): rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] ) return rval output = np.zeros((num_examples,num_output_features),dtype='float32') fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nhid] ) ) ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) if len(range(0,num_examples-batch_size+1,batch_size)) <= 0: print(num_examples) print(batch_size) for i in xrange(0,num_examples-batch_size+1,batch_size): print(i) t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i+batch_size,:]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit = False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if contains_nan(feat): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1, left[j]:right[j]+1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1, left[j]:right[j]+1, idxs[j]].max() else: assert False assert output[i:i+batch_size,:].max() < 1e20 t6 = time.time() print((t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5)) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy' np.save(save_path,output) if nan > 0: warnings.warn(str(nan)+' features were nan')
def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ', self.restrict[ 0], ' through ', self.restrict[1], ' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1, 2))) else: assert False def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval output = np.zeros((num_examples, num_output_features), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].max() else: assert False assert output[i:i + batch_size, :].max() < 1e20 t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')