def test_extract_reassemble(): """ Tests that ExtractGridPatches and ReassembleGridPatches are inverse of each other """ rng = np.random.RandomState([1, 3, 7]) topo = rng.randn(4, 3 * 5, 3 * 7, 2) dataset = DenseDesignMatrix(topo_view=topo) patch_shape = (3, 7) extractor = ExtractGridPatches(patch_shape, patch_shape) reassemblor = ReassembleGridPatches(patch_shape=patch_shape, orig_shape=topo.shape[1:3]) dataset.apply_preprocessor(extractor) dataset.apply_preprocessor(reassemblor) new_topo = dataset.get_topological_view() assert new_topo.shape == topo.shape if not np.all(new_topo == topo): assert False
else: assert False print 'compiling theano function' f = function([V], feat) print 'running theano function' feat = f(X2) feat_dataset = DenseDesignMatrix(X=feat, view_converter=DefaultViewConverter( [1, 1, feat.shape[1]])) print 'reassembling features' ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) feat_dataset.apply_preprocessor(depatchifier) print 'making topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == X.shape[0] print 'assembling visualizer' n = np.ceil(np.sqrt(model.nhid)) pv3 = PatchViewer(grid_shape=(X.shape[0], num_filters), patch_shape=(ns, ns), is_color=False) pv4 = PatchViewer(grid_shape=(n, n), patch_shape=(size, size),
def __call__(self, full_X): feature_type = self.feature_type pooling_region_counts = self.pooling_region_counts model = self.model size = self.size nan = 0 full_X = full_X.reshape(1, full_X.shape[0], full_X.shape[1], full_X.shape[2]) if full_X.shape[3] == 1: full_X = np.concatenate((full_X, full_X, full_X), axis=3) print 'full_X.shape: ' + str(full_X.shape) num_examples = full_X.shape[0] assert num_examples == 1 pipeline = self.preprocessor def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = self.region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval outputs = [ np.zeros((num_examples, count, count, model.nhid), dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, model.nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) batch_size = 1 for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = DenseDesignMatrix( topo_view=np.cast['float32'](full_X[i:i + batch_size, :]), view_converter=DefaultViewConverter((32, 32, 3))) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = self.f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i + batch_size, ...] = average_pool(count) t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) return outputs[0]
def _execute(self): global pooling_matrix save_path = self.save_path batch_size = self.batch_size feature_type = self.feature_type dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ', self.restrict[ 0], ' through ', self.restrict[1], ' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') model.make_pseudoparams() d = model.e_step.variational_inference(V=V) H = d['H_hat'] Mu1 = d['S_hat'] assert H.dtype == 'float32' assert Mu1.dtype == 'float32' if self.feature_type == 'map_hs': feat = (H > 0.5) * Mu1 elif self.feature_type == 'map_h': feat = T.cast(H > 0.5, dtype='float32') elif self.feature_type == 'exp_hs': feat = H * Mu1 elif self.feature_type == 'exp_h': feat = H elif self.feature_type == 'exp_h_thresh': feat = H * (H > .01) else: raise NotImplementedError() assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) if config.device.startswith('gpu') and model.nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval num_superpixels = 7 output = np.zeros((num_examples, pooling_matrix.shape[0]), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, model.nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) pooled = pooling_matrix.dot(superpixels.T).T output[i:i + batch_size, :] = pooled t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ', self.restrict[ 0], ' through ', self.restrict[1], ' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1, 2))) else: assert False def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval output = np.zeros((num_examples, num_output_features), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].max() else: assert False assert output[i:i + batch_size, :].max() < 1e20 t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def _execute(self): batch_size = self.batch_size feature_type = self.feature_type pooling_region_counts = self.pooling_region_counts dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() assert full_X.dtype == 'float32' num_examples = full_X.shape[0] assert num_examples == expected_num_examples print 'restricting to examples from classes 0 and 1' full_X = full_X[dataset.y_fine < 2, :] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') assert V.type.dtype == 'float32' model.make_pseudoparams() d = model.infer(V=V) H = d['H_hat'] Mu1 = d['S_hat'] G = d['G_hat'] if len(G) != 1: raise NotImplementedError( "only supports two layer pd-dbms for now") G, = G assert H.dtype == 'float32' assert Mu1.dtype == 'float32' nfeat = model.s3c.nhid + model.dbm.rbms[0].nhid if self.feature_type == 'map_hs': feat = (H > 0.5) * Mu1 raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'map_h': feat = T.cast(H > 0.5, dtype='float32') raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'exp_hs': feat = H * Mu1 raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'exp_hs_split': Z = H * Mu1 pos = T.clip(Z, 0., 1e32) neg = T.clip(-Z, 0, 1e32) feat = T.concatenate((pos, neg), axis=1) nfeat *= 2 raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'exp_h,exp_g': feat = T.concatenate((H, G), axis=1) elif self.feature_type == 'exp_h_thresh': feat = H * (H > .01) raise NotImplementedError("doesn't support layer 2") else: raise NotImplementedError() assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) if config.device.startswith('gpu') and nfeat >= 4000: f = halver(f, nfeat) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_feat_var = topo_feat_var.mean(axis=(1, 2)) elif self.pool_mode == 'max': region_feat_var = topo_feat_var.max(axis=(1, 2)) else: raise ValueError("Unknown pool mode: " + self.pool_mode) region_features = function([topo_feat_var], region_feat_var) def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval outputs = [ np.zeros((num_examples, count, count, nfeat), dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nfeat])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = np.cast['float32'](d.get_design_matrix()) t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i + batch_size, ...] = average_pool(count) t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) for output, save_path in zip(outputs, self.save_paths): np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def _execute(self): batch_size = self.batch_size pooling_region_counts = self.pooling_region_counts dataset_family = self.dataset_family which_set = self.which_set size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1],:] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) ) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' Z = T.matrix('Z') if self.one_sided: feat = abs(Z) else: pos = T.clip(Z,0.,1e30) neg = T.clip(-Z,0.,1e30) feat = T.concatenate((pos, neg), axis=1) print 'compiling theano function' f = function([Z],feat) nfeat = self.W.shape[1] * (2 - self.one_sided) if not (nfeat == 1600 or nfeat == 3200): print nfeat assert False if config.device.startswith('gpu') and nfeat >= 4000: f = halver(f, nfeat) topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')() region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1,2)) ) def average_pool( stride ): def point( p ): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32') for i in xrange(stride): for j in xrange(stride): rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] ) return rval outputs = [ np.zeros((num_examples,count,count,nfeat),dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nfeat] ) ) ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) if len(range(0,num_examples-batch_size+1,batch_size)) <= 0: print num_examples print batch_size for i in xrange(0,num_examples-batch_size+1,batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i+batch_size,:]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit = False) X2 = d.get_design_matrix() t3 = time.time() M.put(s,'batch',X2) M.eval(s, 'Z = sparse_codes(batch, dictionary, lambda)') Z = M.get(s, 'Z') feat = f(np.cast['float32'](Z)) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i+batch_size,...] = average_pool(count) t6 = time.time() print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5) for output, save_path in zip(outputs, self.save_paths): if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy' np.save(save_path,output) if nan > 0: warnings.warn(str(nan)+' features were nan')