def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None): """ Method inherited from the Dataset. """ self.mode = mode self.batch_size = batch_size self._targets = targets self.cur_idx = -1 if mode == 'sequential': self.subset_iterator = SequentialSubsetIterator( self.data_n_rows, batch_size=1, num_batches=num_batches, rng=None) return self else: raise NotImplementedError( 'other iteration scheme not supported for now!')
def test_correct_sequential_slices(): iterator = SequentialSubsetIterator(10, 3, 4) sl = iterator.next() assert sl.start == 0 assert sl.stop == 3 assert sl.step is None sl = iterator.next() assert sl.start == 3 assert sl.stop == 6 assert sl.step is None sl = iterator.next() assert sl.start == 6 assert sl.stop == 9 assert sl.step is None sl = iterator.next() assert sl.start == 9 assert sl.stop == 10 assert sl.step is None
def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None): """ method inherited from Dataset """ self.mode = mode self.batch_size = batch_size self._targets = targets if mode == 'sequential': self.subset_iterator = SequentialSubsetIterator(self.data_n_rows, batch_size, num_batches, rng=None) return self else: raise NotImplementedError('other iteration scheme not supported for now!')
def test_misc_exceptions(): raised = False try: SubsetIterator.__new__(SubsetIterator).next() except NotImplementedError: raised = True assert raised raised = False try: SubsetIterator(1, 2, 3) except NotImplementedError: raised = True assert raised raised = False try: SequentialSubsetIterator(10, 3, 3, rng=0) except ValueError: raised = True assert raised
def test_sequential_num_batches_and_batch_size(): try: # This should be fine, we have enough examples for 4 batches # (with one under-sized batch). iterator = SequentialSubsetIterator(10, 3, 4) for i in range(4): iterator.next() except Exception as e: assert False raised = False try: iterator.next() except StopIteration: raised = True assert raised try: # This should be fine, we have enough examples for 4 batches # (with one to spare). iterator = SequentialSubsetIterator(10, 3, 3) for i in range(3): iterator.next() except Exception: assert False raised = False try: iterator.next() except StopIteration: raised = True assert raised try: # This should fail, since you can't make 5 batches of 3 from 10. iterator = SequentialSubsetIterator(10, 3, 5) except ValueError: return assert False
class SparseDataset(Dataset): """ SparseDataset is by itself an iterator. """ def __init__(self, load_path=None, from_scipy_sparse_dataset=None, zipped_npy=True): self.load_path = load_path if self.load_path != None: if zipped_npy == True: print '... loading sparse data set from a zip npy file' self.sparse_matrix = scipy.sparse.csr_matrix(numpy.load( gzip.open(load_path)), dtype=floatX) else: print '... loading sparse data set from a npy file' self.sparse_matrix = scipy.sparse.csr_matrix( numpy.load(load_path).item(), dtype=floatX) else: print '... building from given sparse dataset' self.sparse_matrix = from_scipy_sparse_dataset self.data_n_rows = self.sparse_matrix.shape[0] self.num_examples = self.data_n_rows def get_design_matrix(self): return self.sparse_matrix def get_batch_design(self, batch_size, include_labels=False): """ method inherited from Dataset """ self.iterator(mode='sequential', batch_size=batch_size, num_batches=None, topo=None) return self.next() def get_batch_topo(self, batch_size): """ method inherited from Dataset """ raise NotImplementedError('Not implemented for sparse dataset') def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None): """ method inherited from Dataset """ self.mode = mode self.batch_size = batch_size self._targets = targets if mode == 'sequential': self.subset_iterator = SequentialSubsetIterator(self.data_n_rows, batch_size, num_batches, rng=None) return self else: raise NotImplementedError( 'other iteration scheme not supported for now!') def __iter__(self): return self def next(self): indx = self.subset_iterator.next() try: mini_batch = self.sparse_matrix[indx] except IndexError: # the ind of minibatch goes beyond the boundary import ipdb ipdb.set_trace() return mini_batch
class SparseDataset(Dataset): """ SparseDataset is by itself an iterator. """ def __init__(self, load_path=None, from_scipy_sparse_dataset=None, zipped_npy=True): self.load_path = load_path if self.load_path != None: if zipped_npy == True: print '... loading sparse data set from a zip npy file' self.sparse_matrix = scipy.sparse.csr_matrix( numpy.load(gzip.open(load_path)), dtype=floatX) else: print '... loading sparse data set from a npy file' self.sparse_matrix = scipy.sparse.csr_matrix( numpy.load(load_path).item(), dtype=floatX) else: print '... building from given sparse dataset' self.sparse_matrix = from_scipy_sparse_dataset self.data_n_rows = self.sparse_matrix.shape[0] self.num_examples = self.data_n_rows def get_design_matrix(self): return self.sparse_matrix def get_batch_design(self, batch_size, include_labels=False): """ method inherited from Dataset """ self.iterator(mode='sequential', batch_size=batch_size, num_batches=None, topo=None) return self.next() def get_batch_topo(self, batch_size): """ method inherited from Dataset """ raise NotImplementedError('Not implemented for sparse dataset') def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None): """ method inherited from Dataset """ self.mode = mode self.batch_size = batch_size self._targets = targets if mode == 'sequential': self.subset_iterator = SequentialSubsetIterator(self.data_n_rows, batch_size, num_batches, rng=None) return self else: raise NotImplementedError('other iteration scheme not supported for now!') def __iter__(self): return self def next(self): indx = self.subset_iterator.next() try: mini_batch = self.sparse_matrix[indx] except IndexError: # the ind of minibatch goes beyond the boundary import ipdb; ipdb.set_trace() return mini_batch
class CroppedPatchesDataset(Dataset): """ CroppedPatchesDataset is by itself an iterator. """ def __init__(self, img_shape, iter_mode="fprop", h5_file=None, start=None, stop=None, mode=None): self.__dict__.update(locals()) self.img_shape = img_shape if self.self is not None: del self.self if mode is not None: self.mode = mode elif start is not None or stop is not None: self.mode = "r+" else: self.mode = "r" if not os.path.isfile(h5_file): raise ValueError("Please enter a valid file path.") self.initialize_dataset(h5_file) def initialize_dataset(self, h5_file): """ Set the files and the patches,...etc. """ self.h5file = tables.openFile(h5_file, mode=self.mode) self.dataset = self.h5file.root self.X = self.dataset.Data.Pt self.Y = self.dataset.Data.Tgt self.imgnos = self.dataset.Data.Ino self.plocs = self.dataset.Data.Ploc self.data_n_rows = self.targets.shape[0] def set_iter_mode(self, r_mode): self.iter_mode = r_mode def get_design_matrix(self): """ Return the patches as a dense design matrix. """ return self.patches def get_batch_design(self, batch_size, include_labels=False): """ Method inherited from the Dataset. """ self.iterator(mode='sequential', batch_size=batch_size, num_batches=None, topo=None) return self.next() def get_batch_topo(self, batch_size): """ Method inherited from the Dataset. """ raise NotImplementedError('Not implemented for sparse dataset') def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None): """ Method inherited from the Dataset. """ self.mode = mode self.batch_size = batch_size self._targets = targets self.cur_idx = -1 if mode == 'sequential': self.subset_iterator = SequentialSubsetIterator( self.data_n_rows, batch_size=1, num_batches=num_batches, rng=None) return self else: raise NotImplementedError( 'other iteration scheme not supported for now!') def __iter__(self): return self def next(self): """ Method for the getting the next indices from the minibatch. """ if self.cur_idx == -1: batch_start_indx = self.subset_iterator.next() else: batch_start_indx = self.cur_idx begining_img_no = self.imgnos[batch_start_indx] mini_batch_patches = [] mini_batch_plocs = [] mini_batch_imgnos = [] mini_batch_targets = [] indx = batch_start_indx while indx is not None: if (mini_batch_targets[-1] is not None) and (mini_batch_imgnos[-1] != begining_img_no): self.cur_idx = indx break try: mini_batch_patches.append(self.X[indx.start]) mini_batch_targets.append(self.Y[indx.start]) mini_batch_imgnos.append(self.imgnos[indx.start]) mini_batch_plocs.append(self.plocs[indx.start]) except IndexError: print "The index of minibatch goes beyond the boundary." import ipdb ipdb.set_trace() indx = self.subset_iterator.next() if self.iter_mode == "train": return (mini_batch_patches, mini_batch_targets) else: return (mini_batch_patches, mini_batch_targets, mini_batch_imgnos, mini_batch_plocs)
class CroppedPatchesDataset(Dataset): """ CroppedPatchesDataset is by itself an iterator. """ def __init__(self, img_shape, iter_mode="fprop", h5_file=None, start=None, stop=None, mode=None): self.__dict__.update(locals()) self.img_shape = img_shape if self.self is not None: del self.self if mode is not None: self.mode = mode elif start is not None or stop is not None: self.mode = "r+" else: self.mode = "r" if not os.path.isfile(h5_file): raise ValueError("Please enter a valid file path.") self.initialize_dataset(h5_file) def initialize_dataset(self, h5_file): """ Set the files and the patches,...etc. """ self.h5file = tables.openFile(h5_file, mode=self.mode) self.dataset = self.h5file.root self.X = self.dataset.Data.Pt self.Y = self.dataset.Data.Tgt self.imgnos = self.dataset.Data.Ino self.plocs = self.dataset.Data.Ploc self.data_n_rows = self.targets.shape[0] def set_iter_mode(self, r_mode): self.iter_mode = r_mode def get_design_matrix(self): """ Return the patches as a dense design matrix. """ return self.patches def get_batch_design(self, batch_size, include_labels=False): """ Method inherited from the Dataset. """ self.iterator(mode='sequential', batch_size=batch_size, num_batches=None, topo=None) return self.next() def get_batch_topo(self, batch_size): """ Method inherited from the Dataset. """ raise NotImplementedError('Not implemented for sparse dataset') def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None): """ Method inherited from the Dataset. """ self.mode = mode self.batch_size = batch_size self._targets = targets self.cur_idx = -1 if mode == 'sequential': self.subset_iterator = SequentialSubsetIterator(self.data_n_rows, batch_size=1, num_batches=num_batches, rng=None) return self else: raise NotImplementedError('other iteration scheme not supported for now!') def __iter__(self): return self def next(self): """ Method for the getting the next indices from the minibatch. """ if self.cur_idx == -1: batch_start_indx = self.subset_iterator.next() else: batch_start_indx = self.cur_idx begining_img_no = self.imgnos[batch_start_indx] mini_batch_patches = [] mini_batch_plocs = [] mini_batch_imgnos = [] mini_batch_targets = [] indx = batch_start_indx while indx is not None: if (mini_batch_targets[-1] is not None) and (mini_batch_imgnos[-1] != begining_img_no): self.cur_idx = indx break try: mini_batch_patches.append(self.X[indx.start]) mini_batch_targets.append(self.Y[indx.start]) mini_batch_imgnos.append(self.imgnos[indx.start]) mini_batch_plocs.append(self.plocs[indx.start]) except IndexError: print "The index of minibatch goes beyond the boundary." import ipdb; ipdb.set_trace() indx = self.subset_iterator.next() if self.iter_mode == "train": return (mini_batch_patches, mini_batch_targets) else: return (mini_batch_patches, mini_batch_targets, mini_batch_imgnos, mini_batch_plocs)