def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, cached=False, post_processing=None, shuffle_memory=False, curriculum=None): self._train_path = train_path self._valid_path = valid_path self._test_path = test_path self._train_size = train_size self._cache_on_memory = cached self._cached_train_data = None self._post_processing = post_processing if post_processing else lambda x: x self._shuffle_memory = shuffle_memory self._curriculum = curriculum self._curriculum_count = 0 if curriculum and not callable(curriculum): raise Exception("curriculum function must be callable") if curriculum and not cached: raise Exception( "curriculum learning needs training data to be cached") if self._cache_on_memory: logging.info("Cache on memory") self._cached_train_data = list( map(self._post_processing, StreamPickler.load(open(self._train_path)))) self._train_size = len(self._cached_train_data) if self._shuffle_memory: logging.info("Shuffle on-memory data") global_rand.shuffle(self._cached_train_data)
def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, cached=False, post_processing=None, shuffle_memory=False): self._train_path = train_path self._valid_path = valid_path self._test_path = test_path self._train_size = train_size self._cache_on_memory = cached self._cached_train_data = None self._post_processing = post_processing if post_processing else lambda x: x self._shuffle_memory = shuffle_memory if self._cache_on_memory: logging.info("Cache on memory") self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path)))) if self._shuffle_memory: logging.info("Shuffle on-memory data") global_rand.shuffle(self._cached_train_data)
def _yield_data(self, subset): subset = list(subset) global_rand.shuffle(subset) bunch_stack_x = [[] for _ in range(self.size)] bunch_stack_y = [[] for _ in range(self.size)] for x, y in subset: stack_lens = map(len, bunch_stack_x) shortest_i = stack_lens.index(min(stack_lens)) bunch_stack_x[shortest_i].extend(x) bunch_stack_y[shortest_i].extend(y) self._pad_zeros(bunch_stack_x) self._pad_zeros(bunch_stack_y) pieces_x = self._cut_to_pieces(bunch_stack_x) pieces_y = self._cut_to_pieces(bunch_stack_y) logging.info("%d pieces this time" % int(float(len(bunch_stack_x[0])) / self.fragment_length)) for piece in zip(pieces_x, pieces_y): yield piece
def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, cached=False, post_processing=None, shuffle_memory=False, curriculum=None): self._train_path = train_path self._valid_path = valid_path self._test_path = test_path self._train_size = train_size self._cache_on_memory = cached self._cached_train_data = None self._post_processing = post_processing if post_processing else lambda x: x self._shuffle_memory = shuffle_memory self._curriculum = curriculum self._curriculum_count = 0 if curriculum and not callable(curriculum): raise Exception("curriculum function must be callable") if curriculum and not cached: raise Exception("curriculum learning needs training data to be cached") if self._cache_on_memory: logging.info("Cache on memory") self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path)))) if self._shuffle_memory: logging.info("Shuffle on-memory data") global_rand.shuffle(self._cached_train_data)
def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, cache_on_memory=False, post_processing=None, shuffle_memory=False): self._train_path = train_path self._valid_path = valid_path self._test_path = test_path self._train_size = train_size self._cache_on_memory = cache_on_memory self._cached_train_data = None self._post_processing = post_processing if post_processing else lambda x: x self._shuffle_memory = shuffle_memory if self._cache_on_memory: logging.info("Cache on memory") self._cached_train_data = list( map(self._post_processing, StreamPickler.load(open(self._train_path)))) if self._shuffle_memory: logging.info("Shuffle on-memory data") global_rand.shuffle(self._cached_train_data)
def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, cached=False, post_processing=None, shuffle_memory=False, data_processor=None): """ :type data_processor: DataProcessor """ self._train_path = train_path self._valid_path = valid_path self._test_path = test_path self._train_size = train_size self._cache_on_memory = cached self._cached_train_data = None self._post_processing = post_processing if post_processing else lambda x: x self._shuffle_memory = shuffle_memory self._epoch = 0 self._data_processor = data_processor if data_processor and not isinstance(data_processor, DataProcessor): raise Exception("data_processor must be an instance of DataProcessor.") if self._cache_on_memory: logging.info("Cache on memory") self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path)))) self._train_size = len(self._cached_train_data) if self._shuffle_memory: logging.info("Shuffle on-memory data") global_rand.shuffle(self._cached_train_data)
default_model = os.path.join(os.path.dirname(__file__), "models", "mlp_distortion1.gz") mnist = MnistDataset() logging.info("transforming images with elastic distortion") expanded_train_set = [] for img, label in mnist.train_set(): expanded_train_set.append((img, label)) original_img = (img * 256).reshape((28, 28)) transformed_img = (elastic_distortion(original_img) / 256).flatten() expanded_train_set.append((transformed_img, label)) global_rand.shuffle(expanded_train_set) expanded_mnist = BasicDataset(train=expanded_train_set, valid=mnist.valid_set(), test=mnist.test_set()) logging.info("expanded training data size: %d" % len(expanded_train_set)) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dense(256, 'relu'), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer()