def main(): ctx = mx.cpu() train_data, test_data = utils.load_data_mnist(batch_size=64) net = nn.HybridSequential() with net.name_scope(): net.add(Convolution_int(acti_bit=8, channels=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), in_channels=1, use_bias=False), Activation_int(8, 'relu'), Convolution_int(acti_bit=8, channels=128, kernel_size=(3,3), strides=(1,1), padding=(1,1), in_channels=64, use_bias=False), Activation_int(8, 'relu'), Dense(units=10, acti_bit=8, in_units=100352),) net.load_params('./mnist_quantize_bias_dense_conv.params', allow_missing=True, ignore_extra=True, ctx=ctx) loss = gluon.loss.SoftmaxCrossEntropyLoss() batch_size = 64 trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.001}) #utils.train(train_data, test_data, net, loss, trainer, ctx=ctx, num_epochs=10) #如果不需要转为sym-module,将下一行注释掉即可。 net.hybridize() #test_acc = utils.evaluate_accuracy(test_data, net, ctx) #net.save_params('./mnist_quantize_bias_dense_conv.params') #print('test acc : ', test_acc) #net.export('mnist_quantize') #net.save_params('./mnist_quantize.params') # added sym, params = nnvm.frontend.from_mxnet(net) graph, lib, params = nnvm.compiler.build(sym, 'llvm', shape={'data': (1, 784)}, params=params) remote = rpc.LocalSession() remote_ctx = remote.gpu(0) module = rumtime.create(graph, lib, ctx=remote_ctx) module.set_input('data', train_data[0]) module.run() output = moudule.get_output(0, tvm.nd.empty(1,), dtype='float32') print(output)
def main(): ctx = mx.cpu() train_data, test_data = utils.load_data_mnist(batch_size=64) net = nn.HybridSequential() with net.name_scope(): net.add(Dense(units=10, acti_bit=8, in_units=100352), ) net.initialize() batch_size = 64 #utils.train(train_data, test_data, net, loss, trainer, ctx=ctx, num_epochs=10) #如果不需要转为sym-module,将下一行注释掉即可。 net.hybridize() #test_acc = utils.evaluate_accuracy(test_data, net, ctx) #net.save_params('./mnist_quantize_bias_dense_conv.params') #print('test acc : ', test_acc) #net.export('mnist_quantize') #net.save_params('./mnist_quantize.params') print(net(mx.symbol.Variable('data')).tojson()) # added sys, params = nnvm.frontend.from_mxnet(net)
def test_mnist(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, normalization=True, eps=1e-4, verbose=False, smaller_set=True, loss='norm', lr_decay=False, binary=True): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. :type loss: string :param loss: to use hinge loss or normal loss. :type lr_decay: boolean :param lr_decay: to use learning_rate decay :type binary: boolean :param binary: to binarize the output :type normalization: boolean :param normalization: normalization output or not :type eps: float :param eps: normalization variable """ # load the dataset; download the dataset if it is not present datasets = load_data_mnist(theano_shared=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_data_y_mat = datasets[3] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_mat = T.matrix('y_mat') epoch = T.lscalar('epoch') rng = numpy.random.RandomState(1234) # construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10, binary=binary, normalization=normalization, eps=eps) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically # Loss can chosen as hinge loss or nll loss if loss == 'norm': cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) else: cost = classifier.logRegressionLayer.hinge(y_mat) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams # According to paper gradient is calculated using binarized weights as # same weights are used during forward propagation if binary: gparams = [T.grad(cost, param) for param in classifier.params_bin] else: gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # change learning rate depending upon input # we are following exponential decay with # learning rate = learning_rate_start * (learning_rate_final / learning_rate_start) ** (epoch/ n_epochs) if lr_decay: lr_start = learning_rate lr_final = 1e-6 updates = [ (param_i, T.cast( T.clip( param_i - (lr_start * (lr_final / lr_start)**(epoch / 25)) * grad_i, -1, 1), theano.config.floatX)) for param_i, grad_i in zip(classifier.params, gparams) ] else: updates = [(param_i, T.cast(T.clip(param_i - learning_rate * grad_i, -1, 1), theano.config.floatX)) for param_i, grad_i in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` # Dependin upon lr_decay and loss function calculation we need to pass different # parameters to training model if loss == 'norm': if lr_decay: train_model = theano.function( inputs=[index, epoch], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], }) else: train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], }) else: if lr_decay: train_model = theano.function( inputs=[index, epoch], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y_mat: train_set_y[index * batch_size:(index + 1) * batch_size], }) else: train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y_mat: train_set_y[index * batch_size:(index + 1) * batch_size], }) ############### # TRAIN MODEL # ############### print('... training') result = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose, lr_decay) # plot_graph(result[2]) return result
elif FLAGS.exp == 'celeba_waemmd': confs = config.conf_celeba_wae_mmd model = WAE.WAE_MMD(confs) optimizer = torch.optim.Adam(model.myparameters, lr=confs['lr'], betas=(confs['B1'], confs['B2'])) scheduler1 = MultiStepLR(optimizer, milestones=confs['milestones1'], gamma=0.5) scheduler2 = MultiStepLR(optimizer, milestones=confs['milestones2'], gamma=0.2) if confs['dataset'] == 'MNIST': trainloader, testloader = load_data_mnist(test=True) elif confs['dataset'] == 'celeba': trainloader, testloader = load_data_celeba() if confs['CUDA']: model.cuda() #if confs['pretrain']: # optimizer_p = torch.optim.Adam(model.encoder.parameters(),lr = confs['lr'], betas=(confs['B1_disc'],confs['B2_disc'])) # pretrain(model,trainloader,optimizer_p,confs) train_losses = [] train_loss_data = {'total': [], 'recon': [], 'match': []} test_loss_data = {'total': [], 'recon': [], 'match': []} test_losses = []
return nd.mean(nd.sum(L, 1)) if __name__ == "__main__": # setting the hyper parameters import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', default=16, type=int) parser.add_argument('--epochs', default=1, type=int) parser.add_argument('--train', default=False, type=bool) args = parser.parse_args() print(args) ctx = utils.try_gpu() # ctx = mx.cpu() train_data, test_data = utils.load_data_mnist(batch_size=args.batch_size,resize=28) net = CapsNet(batch_size=args.batch_size,ctx=ctx) margin_loss = CapsuleMarginLoss() print('====================================net====================================') print(net) if args.train: print('====================================train====================================') trainer = Trainer(net.collect_params(),'adam', {'learning_rate': 0.01}) utils.train(train_data, test_data, net, margin_loss, trainer, ctx, num_epochs=args.epochs)
sys.path.append('./dependencies') import utils ctx = utils.try_gpu() import VAE as vaemodule vae = vaemodule.VAE() filename1 = './params/vae.params.get' #vae.load_params(filename1, ctx = ctx) vae.collect_params() print(vae) vae.initialize(ctx=ctx) batch_size = 128 total_epoch = 100 epoch_size = 500 train_data, test_data = utils.load_data_mnist(batch_size) trainer = gluon.Trainer(vae.collect_params(), 'sgd', {'learning_rate': 0.01}) train_last_loss = 2. train_curr_loss = 0.1 for epoch in range(total_epoch): if abs(train_last_loss - train_curr_loss) / train_last_loss < 1e-3: break train_loss = 0. tic = time.time() num = 0 for data, label in train_data: num += 1 # Control the number of training data in each epoch if num > epoch_size:
# 手写字体MNIST 多层感知器Multilayer Percepton (MLP)识别 # 多层神经网络 import mxnet as mx from mxnet import gluon, autograd, ndarray import numpy as np import sys sys.path.append('..') import utils #包含了自己定义的一些通用函数 如下载 载入数据集等 ########################################################## #### 准备输入数据 ### #我们通过gluon的data.vision模块自动下载这个数据 batch_size = 256#每次训练 输入的图片数量 train_data, test_data = utils.load_data_mnist(batch_size) ''' def transform(data, label): return data.astype('float32')/255, label.astype('float32') #下载数据 #mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform) #mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform) mnist_train = gluon.data.vision.MNIST(train=True, transform=transform) mnist_test = gluon.data.vision.MNIST(train=False, transform=transform) # 使用gluon.data.DataLoader载入训练数据和测试数据。 # 这个DataLoader是一个iterator对象类,非常适合处理规模较大的数据集。 train_data = gluon.data.DataLoader(mnist_train, batch_size=32, shuffle=True) test_data = gluon.data.DataLoader(train_data, batch_size=32, shuffle=False) ''' ###########################################