default_model = os.path.join(os.path.dirname(__file__), "models", "lstm_rnnlmnew.gz") default_dict = '/home/tangyaohua/dl4mt/data/larger.corpus/vocab.chinese.pkl' # default_dict = '/home/tangyh/Dropbox/PycharmProjects/dl4mt/session2/lm/resources/vocab.chinese.pkl' if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default='') ap.add_argument("--dictpath", default=default_dict) ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_datagivendict(dictpath=args.dictpath, small=args.small, history_len=5, batch_size=16) inputx=T.imatrix('x') print len(vocab), 'len(vocab)' model = NeuralLM(len(vocab), test_data=None, input_tensor=inputx) model.stack(LSTM(hidden_size=100, output_type="sequence", persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), FullOutputLayer(len(vocab))) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7}) annealer = LearningRateAnnealer(trainer) trainer.run(lmdata, controllers=[annealer]) model.save_params(default_model)
ap.add_argument("--model", default="") ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_data(small=args.small, history_len=5, batch_size=64) import pdb pdb.set_trace() model = NeuralLM(vocab.size) model.stack( RNN(hidden_size=100, output_type="sequence", hidden_activation='sigmoid', persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), ClassOutputLayer(output_size=100, class_size=100)) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer( model, { "learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7 }) annealer = LearningRateAnnealer() trainer.run(lmdata, epoch_controllers=[annealer]) model.save_params(default_model)
model.stack(Dropout(p=dropout_p_0), Dense(n, init=init, disable_bias=True), BatchNormalization(), Activation(activation)) #model.stack(Dropout(p=dropout_p_0), BatchNormalization()) for _ in range(T): #model.stack(HighwayLayerLRDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init)) model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)]) logging.info('Setting best parameters for testing.')
ap.add_argument("--disable_backprop", default=False) ap.add_argument("--disable_reinforce", default=False) ap.add_argument("--random_glimpse", default=False) args = ap.parse_args() mnist = MiniBatches((MnistDataset()), batch_size=1) model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = LearningRateAnnealer.learning_rate( args.learning_rate) trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer = FirstGlimpseTrainer(network, network.layers[0], config=trainer_conf) annealer = LearningRateAnnealer(trainer, patience=5) timer = Timer() for _ in trainer.train(mnist.train_set(), mnist.valid_set(), mnist.test_set()): if annealer.invoke(): break
from layers import FullOutputLayer logging.basicConfig(level=logging.INFO) default_model = os.path.join(os.path.dirname(__file__), "models", "lstm_rnnlm.gz") if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default="") ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_data(small=args.small, history_len=5, batch_size=64) model = NeuralLM(vocab.size) model.stack(LSTM(hidden_size=100, output_type="sequence", persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), FullOutputLayer(vocab.size)) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7}) annealer = LearningRateAnnealer(trainer) trainer.run(lmdata, controllers=[annealer]) model.save_params(default_model)
import logging, os logging.basicConfig(level=logging.INFO) # MNIST Multi-layer model with dropout. from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "tutorial1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dropout(0.2), Dense(256, 'relu'), Dropout(0.2), Dense(10, 'linear'), Softmax()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(0.01)}) annealer = LearningRateAnnealer(trainer) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0=dropout_p_h_0, d_p_1=dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate( learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)]) logging.info('Setting best parameters for testing.')
if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz")) args = ap.parse_args() model = NeuralRegressor(input_dim=2, input_tensor=3) model.stack(IRNN(hidden_size=100, input_type="sequence", output_type="one"), Dense(1)) if os.path.exists(args.model): model.load_params(args.model) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(0.01) conf.gradient_clipping = 3 conf.patience = 50 conf.gradient_tolerance = 5 conf.avoid_nan = False trainer = SGDTrainer(model, conf) annealer = LearningRateAnnealer(patience=20) trainer.run(batch_set, controllers=[annealer]) model.save_params(args.model) print "Identity matrix weight:" print model.first_layer().W_h.get_value().diagonal()
ap.add_argument("--learning_rate", default=0.01) ap.add_argument("--variance", default=0.03) ap.add_argument("--disable_backprop", default=False) ap.add_argument("--disable_reinforce", default=False) ap.add_argument("--random_glimpse", default=False) args = ap.parse_args() mnist = MiniBatches((MnistDataset()), batch_size=1) model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = LearningRateAnnealer.learning_rate(args.learning_rate) trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer = FirstGlimpseTrainer(network, network.layers[0], config=trainer_conf) annealer = LearningRateAnnealer(trainer, patience=5) timer = Timer() for _ in trainer.train(mnist.train_set(), mnist.valid_set(), mnist.test_set()): if annealer.invoke(): break timer.end() network.save_params(model_path)
import logging, os logging.basicConfig(level=logging.INFO) # MNIST Multi-layer model with dropout. from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "tutorial1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28*28) model.stack(Dense(256, 'relu'), Dropout(0.2), Dense(256, 'relu'), Dropout(0.2), Dense(10, 'linear'), Softmax()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(0.01)}) annealer = LearningRateAnnealer(trainer) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
if __name__ == '__main__': from argparse import ArgumentParser ap = ArgumentParser() ap.add_argument("--load", default="", help="pre-trained model path") ap.add_argument("--finetune", action="store_true") args = ap.parse_args() model = DrawModel(image_width=28, image_height=28, attention_times=64) if args.load: model.load_params(args.load) conf = { "gradient_clipping": 10, "learning_rate": LearningRateAnnealer.learning_rate(0.004), "weight_l2": 0 } # conf.avoid_nan = True # from deepy import DETECT_NAN_MODE # conf.theano_mode = DETECT_NAN_MODE # TODO: Find out the problem causing NaN if args.finetune: trainer = FineTuningAdaGradTrainer(model, conf) else: trainer = AdamTrainer(model, conf) mnist = MiniBatches(BinarizedMnistDataset(), batch_size=100) trainer.run(mnist, controllers=[])