def run(method, model_path): model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(128, 'relu'), Dense(128, 'relu'), Dense(10, 'linear'), Softmax()) trainer = ScipyTrainer(model, method) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(model_path)
def run(initializer, model_path): model = NeuralClassifier(input_dim=28 * 28) for _ in range(6): model.stack(Dense(128, 'relu', init=initializer)) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer(trainer) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
def _initialize_impl(self, X, y=None): assert not self.is_initialized,\ "This neural network has already been initialized." self._create_specs(X, y) self._create_mlp() if y is None: return if self.valid_size > 0.0: assert self.valid_set is None, "Can't specify valid_size and valid_set together." X, X_v, y, y_v = sklearn.cross_validation.train_test_split( X, y, test_size=self.valid_size, random_state=self.random_state) self.valid_set = X_v, y_v self.train_set = X, y self.trainer = MomentumTrainer(self.mlp) self.controllers = [ self, LearningRateAnnealer(self.trainer, patience=self.n_stable, anneal_times=0)]
Classify MNIST digits using a very deep think network. Plain deep networks are very hard to be trained, as shown in this case. But we should notice that if highway layers just learn to pass information forward, in other words, just be transparent layers, then they would be meaningless. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) for _ in range(20): model.stack(Dense(71, 'relu')) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer()]) model.save_params(model_path)
# -*- coding: utf-8 -*- import os from deepy.networks import AutoEncoder from deepy.layers import RNN, Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH HIDDEN_SIZE = 50 model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz") if __name__ == '__main__': model = AutoEncoder(rep_dim=10, input_dim=VECTOR_SIZE, input_tensor=3) model.stack_encoders( RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one")) model.stack_decoders( RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH), Dense(VECTOR_SIZE, 'softmax')) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(get_data(), controllers=[annealer]) model.save_params(model_path)
default_model = os.path.join(os.path.dirname(__file__), "models", "lstm_rnnlmnew.gz") default_dict = '/home/tangyaohua/dl4mt/data/larger.corpus/vocab.chinese.pkl' # default_dict = '/home/tangyh/Dropbox/PycharmProjects/dl4mt/session2/lm/resources/vocab.chinese.pkl' if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default='') ap.add_argument("--dictpath", default=default_dict) ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_datagivendict(dictpath=args.dictpath, small=args.small, history_len=5, batch_size=16) inputx=T.imatrix('x') print len(vocab), 'len(vocab)' model = NeuralLM(len(vocab), test_data=None, input_tensor=inputx) model.stack(LSTM(hidden_size=100, output_type="sequence", persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), FullOutputLayer(len(vocab))) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7}) annealer = LearningRateAnnealer(trainer) trainer.run(lmdata, controllers=[annealer]) model.save_params(default_model)
ap.add_argument("--disable_backprop", default=False) ap.add_argument("--disable_reinforce", default=False) ap.add_argument("--random_glimpse", default=False) args = ap.parse_args() mnist = MiniBatches((MnistDataset()), batch_size=1) model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = LearningRateAnnealer.learning_rate( args.learning_rate) trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer = FirstGlimpseTrainer(network, network.layers[0], config=trainer_conf) annealer = LearningRateAnnealer(trainer, patience=5) timer = Timer() for _ in trainer.train(mnist.train_set(), mnist.valid_set(), mnist.test_set()): if annealer.invoke(): break
model.stack(Dropout(p=dropout_p_0), Dense(n, init=init, disable_bias=True), BatchNormalization(), Activation(activation)) #model.stack(Dropout(p=dropout_p_0), BatchNormalization()) for _ in range(T): #model.stack(HighwayLayerLRDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init)) model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)]) logging.info('Setting best parameters for testing.')
if __name__ == '__main__': from argparse import ArgumentParser ap = ArgumentParser() ap.add_argument("--load", default="", help="pre-trained model path") ap.add_argument("--finetune", action="store_true") args = ap.parse_args() model = DrawModel(image_width=28, image_height=28, attention_times=64) if args.load: model.load_params(args.load) conf = { "gradient_clipping": 10, "learning_rate": LearningRateAnnealer.learning_rate(0.004), "weight_l2": 0 } # conf.avoid_nan = True # from deepy import DETECT_NAN_MODE # conf.theano_mode = DETECT_NAN_MODE # TODO: Find out the problem causing NaN if args.finetune: trainer = FineTuningAdaGradTrainer(model, conf) else: trainer = AdamTrainer(model, conf) mnist = MiniBatches(BinarizedMnistDataset(), batch_size=100) trainer.run(mnist, controllers=[])
from layers import FullOutputLayer logging.basicConfig(level=logging.INFO) default_model = os.path.join(os.path.dirname(__file__), "models", "lstm_rnnlm.gz") if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default="") ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_data(small=args.small, history_len=5, batch_size=64) model = NeuralLM(vocab.size) model.stack(LSTM(hidden_size=100, output_type="sequence", persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), FullOutputLayer(vocab.size)) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7}) annealer = LearningRateAnnealer(trainer) trainer.run(lmdata, controllers=[annealer]) model.save_params(default_model)
HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0=dropout_p_h_0, d_p_1=dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate( learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)]) logging.info('Setting best parameters for testing.')
if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz")) args = ap.parse_args() model = NeuralRegressor(input_dim=2, input_tensor=3) model.stack(IRNN(hidden_size=100, input_type="sequence", output_type="one"), Dense(1)) if os.path.exists(args.model): model.load_params(args.model) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(0.01) conf.gradient_clipping = 3 conf.patience = 50 conf.gradient_tolerance = 5 conf.avoid_nan = False trainer = SGDTrainer(model, conf) annealer = LearningRateAnnealer(patience=20) trainer.run(batch_set, controllers=[annealer]) model.save_params(args.model) print "Identity matrix weight:" print model.first_layer().W_h.get_value().diagonal()
ap.add_argument("--learning_rate", default=0.01) ap.add_argument("--variance", default=0.03) ap.add_argument("--disable_backprop", default=False) ap.add_argument("--disable_reinforce", default=False) ap.add_argument("--random_glimpse", default=False) args = ap.parse_args() mnist = MiniBatches((MnistDataset()), batch_size=1) model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = LearningRateAnnealer.learning_rate(args.learning_rate) trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer = FirstGlimpseTrainer(network, network.layers[0], config=trainer_conf) annealer = LearningRateAnnealer(trainer, patience=5) timer = Timer() for _ in trainer.train(mnist.train_set(), mnist.valid_set(), mnist.test_set()): if annealer.invoke(): break timer.end() network.save_params(model_path)
import logging, os logging.basicConfig(level=logging.INFO) # MNIST Multi-layer model with dropout. from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "tutorial1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28*28) model.stack(Dense(256, 'relu'), Dropout(0.2), Dense(256, 'relu'), Dropout(0.2), Dense(10, 'linear'), Softmax()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(0.01)}) annealer = LearningRateAnnealer(trainer) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
default_model = os.path.join(os.path.dirname(__file__), "models", "deep_conv.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack( # Reshape to 3D tensor Reshape((-1, 28, 28)), # Add a new dimension for convolution DimShuffle((0, 'x', 1, 2)), Convolution((4, 1, 5, 5), activation="relu"), Dropout(0.15), Convolution((8, 4, 5, 5), activation="relu"), Dropout(0.1), Convolution((16, 8, 3, 3), activation="relu"), Flatten(), Dropout(0.1), # As dimension information was lost, reveal it to the pipe line RevealDimension(16), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
ap.add_argument("--model", default="") ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_data(small=args.small, history_len=5, batch_size=64) import pdb pdb.set_trace() model = NeuralLM(vocab.size) model.stack( RNN(hidden_size=100, output_type="sequence", hidden_activation='sigmoid', persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), ClassOutputLayer(output_size=100, class_size=100)) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer( model, { "learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7 }) annealer = LearningRateAnnealer() trainer.run(lmdata, epoch_controllers=[annealer]) model.save_params(default_model)
import logging, os logging.basicConfig(level=logging.INFO) # MNIST Multi-layer model with dropout. from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "tutorial1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dropout(0.2), Dense(256, 'relu'), Dropout(0.2), Dense(10, 'linear'), Softmax()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(0.01)}) annealer = LearningRateAnnealer(trainer) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)