def base(train_dp, valid_dp, logger, learning_rate): # learning_rate = 0.01 rng = numpy.random.RandomState([2016,02,26]) max_epochs = 1000 cost = CECost() stats = list() test_dp = deepcopy(valid_dp) train_dp.reset() valid_dp.reset() test_dp.reset() # NETWORK TOPOLOGY: model = MLP(cost=cost) model.add_layer(Relu(idim=125, odim=125, irange=1.6, rng=rng)) model.add_layer(Softmax(idim=125, odim=19, rng=rng)) # define the optimiser, here stochasitc gradient descent # with fixed learning rate and max_epochs lr_scheduler = LearningRateFixed( learning_rate=learning_rate, max_epochs=max_epochs) optimiser = SGDOptimiser(lr_scheduler=lr_scheduler) logger.info('Training started...') tr_stats_b, valid_stats_b = optimiser.train(model, train_dp, valid_dp) logger.info('Testing the model on test set:') tst_cost, tst_accuracy = optimiser.validate(model, test_dp) logger.info('ACL test set accuracy is %.2f %%, cost (%s) is %.3f' % (tst_accuracy*100., cost.get_name(), tst_cost))
name='RLAx') for dt in pandas.date_range("2015-01-10", "2015-10-10"): print "date: " + str(dt) train_dp.reset() test_dp.reset() valid_dp.reset() rng = numpy.random.RandomState([dt.year, dt.month, dt.day]) # define the model structure, here just one linear layer # and mean square error cost cost = CECost() model = MLP(cost=cost) model.add_layer(ConvRelu_Opt(1, 1, rng=rng, stride=(1, 1))) model.add_layer(Sigmoid(idim=122, odim=122, rng=rng)) model.add_layer(Softmax(idim=122, odim=19, rng=rng)) #one can stack more layers here # print map(lambda x: (x.idim, x.odim), model.layers) lr_scheduler = LearningRateFixed(learning_rate=0.01, max_epochs=500) optimiser = SGDOptimiser(lr_scheduler=lr_scheduler) tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp) tst_cost, tst_accuracy = optimiser.validate(model, test_dp) seeds.append((tr_stats, valid_stats, (tst_cost, tst_accuracy))) end = time.time() print "scipy.correlate time: " + str(end - start) with open('seeds_conv_fft_feat.pkl', 'wb') as f:
""" num_inp_feat_maps, num_out_feat_maps, image_shape=(28, 28), kernel_shape=(5, 5), stride=(1, 1), irange=0.2, rng=None, conv_fwd=my_conv_fwd, conv_bck=my_conv_bck, conv_grad=my_conv_grad) """ tsk8_2_model.add_layer( ConvRelu(num_inp_feat_maps=1, num_out_feat_maps=5, image_shape=(28, 28), kernel_shape=(5, 5), stride=(1, 1), rng=rng)) tsk8_2_model.add_layer( ConvMaxPool2D(num_feat_maps=5, conv_shape=(24, 24), pool_shape=(2, 2), pool_stride=(2, 2))) #idim, odim, tsk8_2_model.add_layer(Relu(idim=5 * 12 * 12, odim=80, rng=rng)) tsk8_2_model.add_layer(Softmax(idim=80, odim=10, rng=rng)) #one can stack more layers here # define the optimiser, here stochasitc gradient descent
learning_rate = 0.07 max_epochs = 30 cost = CECost() stats = list() test_dp = deepcopy(valid_dp) train_dp.reset() valid_dp.reset() test_dp.reset() #define the model model = MLP(cost=cost) #model.add_layer(ComplexLinear(idim=125, odim=125, irange=1.6, rng=rng)) #model.add_layer(Sigmoid(idim=2*125, odim=125, irange=1.6, rng=rng)) model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng)) model.add_layer(Softmax(idim=125, odim=19, rng=rng)) # define the optimiser, here stochasitc gradient descent # with fixed learning rate and max_epochs lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs) optimiser = SGDOptimiser(lr_scheduler=lr_scheduler) logger.info('Training started...') tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp) logger.info('Testing the model on test set:') tst_cost, tst_accuracy = optimiser.validate(model,test_dp ) logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
""" num_inp_feat_maps, num_out_feat_maps, image_shape=(28, 28), kernel_shape=(5, 5), stride=(1, 1), irange=0.2, rng=None, conv_fwd=my_conv_fwd, conv_bck=my_conv_bck, conv_grad=my_conv_grad) """ tsk8_1_model.add_layer( ConvSigmoid(num_inp_feat_maps=1, num_out_feat_maps=1, image_shape=(28, 28), kernel_shape=(5, 5), stride=(1, 1), rng=rng)) tsk8_1_model.add_layer( ConvMaxPool2D(num_feat_maps=1, conv_shape=(24, 24), pool_shape=(2, 2), pool_stride=(2, 2))) #idim, odim, tsk8_1_model.add_layer(Sigmoid(idim=12 * 12, odim=100, rng=rng)) tsk8_1_model.add_layer(Softmax(idim=100, odim=10, rng=rng)) #one can stack more layers here # define the optimiser, here stochasitc gradient descent
learning_rate = 0.07 max_epochs = 30 cost = CECost() stats = list() test_dp = deepcopy(valid_dp) train_dp.reset() valid_dp.reset() test_dp.reset() #define the model model = MLP(cost=cost) #model.add_layer(ComplexLinear(idim=125, odim=125, irange=1.6, rng=rng)) #model.add_layer(Sigmoid(idim=2*125, odim=125, irange=1.6, rng=rng)) model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng)) model.add_layer(Softmax(idim=125, odim=19, rng=rng)) # define the optimiser, here stochasitc gradient descent # with fixed learning rate and max_epochs lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs) optimiser = SGDOptimiser(lr_scheduler=lr_scheduler) logger.info('Training started...') tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp) logger.info('Testing the model on test set:') tst_cost, tst_accuracy = optimiser.validate(model, test_dp) logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f' %
test_dp = MACLDataProvider(dset="valid", batch_size=1140, max_num_batches=1, randomize=False, fft=True, name="RLAx") for dt in pandas.date_range("2015-01-10", "2015-10-10"): print "date: " + str(dt) train_dp.reset() test_dp.reset() valid_dp.reset() rng = numpy.random.RandomState([dt.year, dt.month, dt.day]) # define the model structure, here just one linear layer # and mean square error cost cost = CECost() model = MLP(cost=cost) model.add_layer(ConvRelu_Opt(1, 1, rng=rng, stride=(1, 1))) model.add_layer(Sigmoid(idim=122, odim=122, rng=rng)) model.add_layer(Softmax(idim=122, odim=19, rng=rng)) # one can stack more layers here # print map(lambda x: (x.idim, x.odim), model.layers) lr_scheduler = LearningRateFixed(learning_rate=0.01, max_epochs=500) optimiser = SGDOptimiser(lr_scheduler=lr_scheduler) tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp) tst_cost, tst_accuracy = optimiser.validate(model, test_dp) seeds.append((tr_stats, valid_stats, (tst_cost, tst_accuracy))) end = time.time() print "scipy.correlate time: " + str(end - start) with open("seeds_conv_fft_feat.pkl", "wb") as f:
tsk8_1_1_model = MLP(cost=cost) """ num_inp_feat_maps, num_out_feat_maps, image_shape=(28, 28), kernel_shape=(5, 5), stride=(1, 1), irange=0.2, rng=None, conv_fwd=my_conv_fwd, conv_bck=my_conv_bck, conv_grad=my_conv_grad) """ tsk8_1_1_model.add_layer(ConvSigmoid(num_inp_feat_maps=1, num_out_feat_maps=1, image_shape=(28,28), kernel_shape=(10, 10), stride=(1,1), rng=rng)) #idim, odim, tsk8_1_1_model.add_layer(Relu(idim=1*19*19, odim=100, rng=rng)) tsk8_1_1_model.add_layer(Softmax(idim=100, odim=10, rng=rng)) #one can stack more layers here # define the optimiser, here stochasitc gradient descent # with fixed learning rate and max_epochs as stopping criterion lr_scheduler = LearningRateFixed(learning_rate=0.1, max_epochs=30) optimiser = SGDOptimiser(lr_scheduler=lr_scheduler) logger.info('Initialising data providers...') train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=-10, randomize=True, conv_reshape=True)
def spretrain(self, model, train_iterator, valid_iterator=None, noise=False): self.noise_stack = [model.rng.binomial(1, 0.25, (train_iterator.batch_size, f.odim)) for f in model.layers] converged = False tr_stats, valid_stats = [], [] cost = MSECost() model_out = MLP(cost=cost) init_layer = Linear(idim=model.layers[0].idim, odim=model.layers[0].idim*2, rng=model.rng) model_out.add_layer(init_layer) nl_layer = Sigmoid(idim=model.layers[0].idim*2, odim=model.layers[0].idim, rng=model.rng) model_out.add_layer(nl_layer) output_layer = Linear(idim=model.layers[0].idim, odim=model.layers[0].idim, rng=model.rng) model_out.add_layer(output_layer) # do the initial validation train_iterator.reset() train_iterator_tmp = self.label_switch(train_iterator) tr_nll, tr_acc = self.validate( model_out, train_iterator_tmp, self.l1_weight, self.l2_weight) logger.info('Epoch %i: SpecPreTraining cost (%s) for initial model is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch, model_out.cost.get_name(), tr_nll, tr_acc * 100.)) tr_stats.append((tr_nll, tr_acc)) layers = model.layers layers_out = list() train_iterator.reset() fprop_list = self.fft_label_switch(deepcopy(train_iterator)) # print fprop_list while not converged: train_iterator.reset() tstart = time.clock() tr_nll, tr_acc = self.spretrain_epoch(model=model_out, train_iterator=(train_iterator), learning_rate=self.lr_scheduler.get_rate(), to_layer=0, fprop_list=fprop_list) tstop = time.clock() tr_stats.append((tr_nll, tr_acc)) logger.info('Epoch %i: PreTraining cost (%s) is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch + 1, model_out.cost.get_name(), tr_nll, tr_acc * 100.)) self.lr_scheduler.get_next_rate(None) vstop = time.clock() train_speed = train_iterator.num_examples_presented() / \ (tstop - tstart) tot_time = vstop - tstart converged = (self.lr_scheduler.get_rate() == 0) # reseting epochs to zero could have just done lr_shed.epoch =0 # but I foucsed most my time on cleaning up the conv code return model_out, tr_stats, valid_stats
def pretrain_discriminative(self, model, train_iterator, valid_iterator=None): converged = False cost_name = model.cost.get_name() tr_stats, valid_stats = [], [] layer_num = len(model.layers) cost = CECost() model_out = MLP(cost=cost) init_layer = model.layers[0] layers = model.layers model_out.add_layer(init_layer) bleugh = layers_dict[layers[-1].get_name()](idim=init_layer.odim, odim=layers[-1].odim, rng=model.rng, irange=layers[-1].irange) model_out.add_layer((layers_dict[layers[-1].get_name()](idim=init_layer.odim, odim=layers[-1].odim, rng=model.rng, irange=layers[-1].irange))) # do the initial validation train_iterator.reset() tr_nll, tr_acc = self.validate( model_out, train_iterator, self.l1_weight, self.l2_weight) logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.)) tr_stats.append((tr_nll, tr_acc)) if valid_iterator is not None: valid_iterator.reset() valid_nll, valid_acc = self.validate( model, valid_iterator, self.l1_weight, self.l2_weight) logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.)) valid_stats.append((valid_nll, valid_acc)) for to_layer in range(len(layers)): if(to_layer > 0 and len(layers) > 2 and to_layer < len(layers) - 1): model_out.remove_top_layer() model_out.layers[ len(model_out.layers) - 1].odim = layers[to_layer].idim tmp_layer = copy(layers[to_layer]) model_out.add_layer(tmp_layer) # This is here to allow the final layer having a different dim # to the hidden layers output since the weight matrix needs # to be reshaped and reinstantiated. Thus I had to modify code # in layers.py to cater for the global variables that allowed me # to do so. I believe this may have been overlooked model_out.add_layer(layers_dict[layers[-1].get_name()](idim=tmp_layer.odim, odim=layers[-1].odim, rng=model.rng, irange=layers[-1].irange)) while not converged: train_iterator.reset() tstart = time.clock() tr_nll, tr_acc = self.pretrain_discriminative_epoch(model=model_out, train_iterator=train_iterator, learning_rate=self.lr_scheduler.get_rate(), to_layer=to_layer) tstop = time.clock() tr_stats.append((tr_nll, tr_acc)) logger.info('Epoch %i: PreTraining cost (%s) is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch + 1, cost_name, tr_nll, tr_acc * 100.)) vstart = time.clock() if valid_iterator is not None: valid_iterator.reset() valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight) logger.info('Epoch %i: PreValidation cost (%s) is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.)) self.lr_scheduler.get_next_rate(valid_acc) valid_stats.append((valid_nll, valid_acc)) else: self.lr_scheduler.get_next_rate(None) vstop = time.clock() train_speed = train_iterator.num_examples_presented() / \ (tstop - tstart) valid_speed = valid_iterator.num_examples_presented() / \ (vstop - vstart) tot_time = vstop - tstart # pps = presentations per second logger.info("Epoch %i: Took %.0f seconds. PreTraining speed %.0f pps. " "Validation speed %.0f pps." % (self.lr_scheduler.epoch, tot_time, train_speed, valid_speed)) # we stop training when learning rate, as returned by lr scheduler, is 0 # this is implementation dependent and depending on lr schedule could happen, # for example, when max_epochs has been reached or if the progress between # two consecutive epochs is too small, etc. converged = (self.lr_scheduler.get_rate() == 0) self.lr_scheduler = copy(self.cache_l) self.dp_scheduler = copy(self.cache_d) converged = False return model_out, tr_stats, valid_stats
def pretrain(self, model, train_iterator, valid_iterator=None, noise=False): """ Returns the layers. Since I was a bit scared of making it return the model and then ran out of time when it came to cleaning up this code.... the code base was super cool but it was time consuming workinground things here and there """ # Whilst the slides say not to noise the learned represantations when # Carrying out a denoising autoencoder in eached learned representation # makes sense when inductively carrying out the definition of a single # unit autoencoder. Nonetheless I did it in the way the slides point out. # yet my version can still be run by passing wrong=True to fprop self.noise_stack = [model.rng.binomial( 1, 0.25, (train_iterator.batch_size, f.odim)) for f in model.layers] converged = False cost_name = model.cost.get_name() tr_stats, valid_stats = [], [] cost = MSECost() model_out = MLP(cost=cost) init_layer = Sigmoid( idim=model.layers[0].idim, odim=model.layers[0].odim, rng=model.rng) model_out.add_layer(init_layer) output_layer = Linear( idim=init_layer.odim, odim=125, rng=init_layer.rng) model_out.add_layer(output_layer) # do the initial validation train_iterator.reset() train_iterator_tmp = self.label_switch(train_iterator) tr_nll, tr_acc = self.validate( model_out, train_iterator_tmp, self.l1_weight, self.l2_weight) logger.info('Epoch %i: PreTraining cost (%s) for initial model is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.)) tr_stats.append((tr_nll, tr_acc)) if valid_iterator is not None: valid_iterator.reset() valid_iterator_tmp = self.label_switch(valid_iterator) valid_nll, valid_acc = self.validate( model_out, valid_iterator_tmp, self.l1_weight, self.l2_weight) logger.info('Epoch %i: PreValidation cost (%s) for initial model is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.)) valid_stats.append((valid_nll, valid_acc)) layers = model.layers layers_out = list() fprop_list = None print len(layers) final = False noise_layer = -1 if noise: noise_layer = 0 for to_layer in range(len(layers)): # This is very ugly yes but I invested my time in conv if(to_layer > 0 and len(layers) > 2 and to_layer < len(layers) - 1): train_iterator.reset() model_out.remove_top_layer() fprop_list = self.fprop_label_switch( (train_iterator), model_out) if noise: noise_layer = to_layer tmp_layer = Sigmoid(idim=model_out.layers[len(model_out.layers) - 1].odim, odim=layers[to_layer].odim, rng=init_layer.rng) model_out.add_layer(tmp_layer) output_layer = Linear(idim=tmp_layer.odim, odim=tmp_layer.idim, rng=init_layer.rng) model_out.add_layer(output_layer) elif to_layer == len(layers) - 1: final = True train_iterator.reset() model_out.remove_top_layer() fprop_list = None output_layer = layers[-1] model_out.add_layer(output_layer) model_out.cost = CECost() noise_layer = -1 while not converged: train_iterator.reset() tstart = time.clock() tr_nll, tr_acc = self.pretrain_epoch(model=model_out, train_iterator=( train_iterator), learning_rate=self.lr_scheduler.get_rate(), to_layer=to_layer, fprop_list=fprop_list, final=final, noise_up_layer=noise_layer) tstop = time.clock() tr_stats.append((tr_nll, tr_acc)) logger.info('Epoch %i: PreTraining cost (%s) is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch + 1, cost_name, tr_nll, tr_acc * 100.)) vstart = time.clock() if valid_iterator is not None: valid_iterator.reset() if fprop_list is not None: valid_iterator_tmp = fprop_list elif not final: valid_iterator_tmp = self.label_switch(valid_iterator) else: valid_iterator_tmp = valid_iterator valid_nll, valid_acc = self.validate(model_out, valid_iterator_tmp, self.l1_weight, self.l2_weight) logger.info('Epoch %i: PreValidation cost (%s) is %.3f. Accuracy is %.2f%%' % (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.)) self.lr_scheduler.get_next_rate(valid_acc) valid_stats.append((valid_nll, valid_acc)) else: self.lr_scheduler.get_next_rate(None) vstop = time.clock() train_speed = train_iterator.num_examples_presented() / \ (tstop - tstart) valid_speed = valid_iterator.num_examples_presented() / \ (vstop - vstart) tot_time = vstop - tstart # pps = presentations per second logger.info("Epoch %i: Took %.0f seconds. PreTraining speed %.0f pps. " "Validation speed %.0f pps." % (self.lr_scheduler.epoch, tot_time, train_speed, valid_speed)) converged = (self.lr_scheduler.get_rate() == 0) # reseting epochs to zero could have just done lr_shed.epoch =0 # but I foucsed most my time on cleaning up the conv code self.lr_scheduler.epoch = 0 if self.dp_scheduler is not None: self.dp_scheduler.epoch = 0 converged = False layers_out.append(model_out.layers[to_layer]) return layers_out, tr_stats, valid_stats