def predict_webcam_image(classifier,x,y,timegap = .1): import cv2 cap = cv2.VideoCapture(0) while(True): # Capture frame-by-frame ret, frame = cap.read() # Our operations on the frame come here gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #resize the image to 48x48 res = cv2.resize(gray,(48,48), interpolation = cv2.INTER_CUBIC) # Display the resulting frames cv2.imshow('frame',gray) cv2.imshow('resized',res) input_image = numpy.reshape(res,(1,48*48)) #Close the frame when 'q' key is pressed. if cv2.waitKey(1) & 0xFF == ord('q'): break res = res/255 temp = numpy.asarray([1]) input_image_x,input_image_y = shared_dataset([input_image,temp]) test_input_example = theano.function( inputs= [], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: input_image_x, y: input_image_y } ) error, input_image_output_probilities = test_input_example() print 'probabilities : ',input_image_output_probilities print 'Number predicted :', emotionDictionary[numpy.argmax(input_image_output_probilities)] print 'with probability :',numpy.max(input_image_output_probilities)*100 print '-------------------------------------' time.sleep(timegap) # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def load_model(filename): f = open(filename, 'rb') filecontent= f.read() loaded_obj = json.loads(filecontent) f.close() print 'loaded' parameters_shared = loaded_obj['p'] learning_rate = loaded_obj['learning_rate'] n_epochs= loaded_obj['n_epochs'] nkerns= loaded_obj['nkerns'] batch_size= 1 verbose= loaded_obj['verbose'] filterwidth_layer0= loaded_obj['filterwidth_layer0'] filterheight_layer0= loaded_obj['filterheight_layer0'] poolsize_layer0= loaded_obj['poolsize_layer0'] filterwidth_layer1= loaded_obj['filterwidth_layer1'] filterheight_layer1= loaded_obj['filterheight_layer1'] poolsize_layer1= loaded_obj['poolsize_layer1'] filterwidth_layer2= loaded_obj['filterwidth_layer2'] filterheight_layer2= loaded_obj['filterheight_layer2'] poolsize_layer2= loaded_obj['poolsize_layer2'] neurons_hidden = loaded_obj['neurons_hidden'] smaller_set= loaded_obj['smaller_set'] parameters = [] for p in parameters_shared: p1 = [] for row in p: p1.append(numpy.asarray(row)) parameters.append(numpy.asarray(p1)) rng = numpy.random.RandomState(23455) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 48, 48)) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape= (batch_size, 1, 48, 48), filter_shape= (nkerns[0],1,filterwidth_layer0,filterheight_layer0), W= parameters[-2], b=parameters[-1], poolsize= (poolsize_layer0,poolsize_layer0) ) print '-------------------------------------------------------------------------------------------- \n' layer0_outputwidth,layer0_outputheight = ( (48-filterwidth_layer0+1)/poolsize_layer0,(48-filterheight_layer0+1)/poolsize_layer0 ) print 'Layer0 build. Shape of feature map :',layer0_outputwidth, layer0_outputheight, 'Number of feature maps : ',nkerns[0] print '-------------------------------------------------------------------------------------------- \n' layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size,nkerns[0],layer0_outputwidth,layer0_outputheight), filter_shape= (nkerns[1],nkerns[0],filterwidth_layer1,filterheight_layer1), W= parameters[-4], b=parameters[-3], poolsize=(poolsize_layer1,poolsize_layer1) ) layer1_outputwidth,layer1_outputheight = (layer0_outputwidth-filterwidth_layer1+1)/poolsize_layer1,(layer0_outputheight-filterwidth_layer1+1)/poolsize_layer1 print 'Layer1 build. Shape of feature map :',layer1_outputwidth,layer1_outputheight, 'Number of feature maps : ',nkerns[1] print '-------------------------------------------------------------------------------------------- \n' poolsize_width_layer0_to_layer1 = layer0_outputwidth/layer1_outputwidth poolsize_height_layer0_to_layer1 = layer0_outputheight/layer1_outputheight print 'poolsize layer 0 o/p to layer 1 o/p width :',layer0_outputwidth/layer1_outputwidth print 'poolsize layer 0 o/p to layer 1 o/p height :',layer0_outputheight/layer1_outputheight layer0_output_ds = downsample.max_pool_2d( input=layer0.output, ds=(poolsize_width_layer0_to_layer1,poolsize_height_layer0_to_layer1), # TDOD: change ds ignore_border=True ) # concatenate layer print 'max pool layer created. between output of layer0 and output of layer1. output of this max pool layer : ',layer0_outputwidth/poolsize_width_layer0_to_layer1,layer0_outputheight/poolsize_height_layer0_to_layer1 print '-------------------------------------------------------------------------------------------- \n' layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) layer2 = LeNetConvPoolLayer( rng, input=layer2_input, image_shape= (batch_size,nkerns[0]+nkerns[1],layer1_outputwidth,layer1_outputheight), filter_shape= (nkerns[2],nkerns[0]+nkerns[1],filterwidth_layer2,filterheight_layer2), W= parameters[-6], b=parameters[-5], poolsize=(poolsize_layer2,poolsize_layer2) ) print 'Input to Layer2 (not equal to output of Layer1) : ', nkerns[0]+nkerns[1] layer2_outputwidth,layer2_outputheight = (layer1_outputwidth-filterwidth_layer2+1)/poolsize_layer2,(layer1_outputheight-filterwidth_layer2+1)/poolsize_layer2 print 'Layer2 build. Shape of feature map :',layer2_outputwidth,layer2_outputheight, 'Number of feature maps : ',nkerns[2] print '-------------------------------------------------------------------------------------------- \n' # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * layer2_outputwidth * layer2_outputwidth, n_out= neurons_hidden, W= parameters[-8], b=parameters[-7], activation=T.tanh ) print 'MLP Layer created. Input neurons : ',nkerns[2] * layer2_outputwidth * layer2_outputwidth, ' Output neurons :',neurons_hidden print '-------------------------------------------------------------------------------------------- \n' # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= neurons_hidden, n_out=7, W= parameters[-10], b=parameters[-9]) print 'Logistic Layer created. Input neurons : ',neurons_hidden, ' output neurons :',10 print '-------------------------------------------------------------------------------------------- \n' # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) print 'Model Created...' ############### # MAKE PREDICTION # ############### import cv2 cap = cv2.VideoCapture(0) while(True): # Capture frame-by-frame ret, frame = cap.read() # Our operations on the frame come here gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #resize the image to 48x48 res = cv2.resize(gray,(48,48), interpolation = cv2.INTER_CUBIC) # Display the resulting frames cv2.imshow('frame',gray) cv2.imshow('resized',res) input_image = numpy.reshape(res,(1,48*48)) batch_size = 1 #Close the frame when 'q' key is pressed. if cv2.waitKey(1) & 0xFF == ord('q'): break res = res/255 temp = numpy.asarray([1]) input_image_x,input_image_y = shared_dataset([input_image,temp]) test_input_example = theano.function( inputs= [], outputs=[layer4.errors(y),layer4.p_y_given_x], givens={ x: input_image_x, y: input_image_y } ) error, input_image_output_probilities = test_input_example() print 'probabilities : ',input_image_output_probilities print 'emotionq predicted :', emotionDictionary[numpy.argmax(input_image_output_probilities)] print 'with probability :',numpy.max(input_image_output_probilities)*100 print '-------------------------------------' time.sleep(0)
def test_mlp_with_new_functionality(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,example_index = 0,adversarial_parameter = 0.01,distribution = 'constant'): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) example_x = test_set[0][example_index:example_index+1] example_y = test_set[1][example_index:example_index+1] # example_x_reshape = numpy.reshape(example_x,(len(example_x),1)) # example_y_reshape = numpy.reshape(example_y,(1,1)) shared_example_x,shared_example_y = shared_dataset([example_x,example_y]) # shared_example_x = shared_example_x.reshape(shared_example_x.reshape[0],-1) # shared_example_x = theano.shared(type =theano.tensor.matrix,value = numpy.asarray(example_x,dtype=theano.config.floatX),borrow = True) # shared_example_y = theano.shared(type = theano.tensor.vector, value = numpy.asarray(example_y,dtype=theano.config.floatX),borrow = True) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size # print ' shapes of the shared_examples', shared_example_y,shared_example_x print 'n_train_batches : ',n_train_batches print 'n_valid_batches : ',n_valid_batches print 'n_test_batches : ',n_test_batches ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. # input, n_in, n_hidden, n_out, n_hiddenLayers classifier = myMLP(rng, input = x, n_in =3072 , n_hidden = n_hidden, n_out= 10, n_hiddenLayers= n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) test_example = theano.function( inputs= [index], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: test_set_x[index * 1:(index + 1) * 1], y: test_set_y[index * 1:(index + 1) * 1] } ) test_example2 = theano.function( inputs= [], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: shared_example_x, y: shared_example_y } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ################## # Performing adversarial example testing# ################## print '-------------------------------------' print 'example x :', example_x image = getImage(test_set[0][example_index]) plt.figure(1) plt.imshow(image) print 'example y :', example_y print '-------------------------------------' classification, probabilities = test_example(example_index) if int(classification) == 0: print 'Correct classification performed :', True else: print 'Correct classification performed :', False print 'probabilities : ',probabilities print 'Number predicted :', numpy.argmax(probabilities) print 'with probability :',numpy.max(probabilities)*100 print '-------------------------------------' gadversarial = T.vector() gadversarial = [T.grad(cost,x)] grad_cost_wrt_x = theano.function( inputs= [index], outputs=gadversarial, givens={ x: test_set_x[index * 1:(index + 1) * 1], y: test_set_y[index * 1:(index + 1) * 1] } ) print 'Creating adversarial example and trying to get results for that ... \n \n \n ' print '-------------------------------------' gradient_cost_wrt_x = grad_cost_wrt_x(example_index) gradient_sign = numpy.sign(gradient_cost_wrt_x) gradient_sign = numpy.reshape(gradient_sign,(1,3072)) adversarial_example_x = example_x + adversarial_parameter*gradient_sign input_image_x,input_image_y = shared_dataset([adversarial_example_x,example_y]) test_input_example = theano.function( inputs= [], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: input_image_x, y: input_image_y } ) adversarial_classification, input_image_output_probilities = test_input_example() if int(adversarial_classification) == 0: print 'Correct adversarial classification performed :', True else: print 'Correct adversarial classification performed :', False image2 = getImage(adversarial_example_x) plt.figure(2) plt.imshow(image2) print 'probabilities : ',input_image_output_probilities print 'Number predicted :', numpy.argmax(input_image_output_probilities) print 'with probability :',numpy.max(input_image_output_probilities)*100 print '-------------------------------------'
return 1.0/ sum(map(lambda p: 1 if p>=probs[y] else 0,probs)) def computeAcc(pred,y): return 1 if numpy.argmax(pred)==y else 0 model=pickle.load(open("../convolutional/results/100raw4.model","r")) (updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist) = model (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("../convolutional/results/lengths.cache",'r')) fn = filenames[:1000] fncl = trainCumLengths[:1000] batch_size = 1 valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processGAMEs(filenames[:6],'raw')) test_batch_x, test_batch_y = utils.shared_dataset(readGame.processGAMEs(filenames[:6],'raw')) game # get game # set batch size to 1 vx = utils.shared_dataset(game,representation='raw') #vx,vy = my_net.getBatch(fn, i, fncl, batch_size,'raw',batchType='fast',history=0) valid_batch_x.set_value(vx) #conds=numpy.array(conditional_dist()) #move= numpy.argmax(conds) move = predictions()[0] move= utils.move2fuego(move) rets.append(move) fw = open("py2c","w") fw.write(str(move))
data.drop_missing_values() # center data VGG style print 'center alexnet' data.center_alexnet() # generate test validation split train_set_x, valid_set_x, train_set_y, valid_set_y = train_test_split( data.X, data.y, test_size=0.2, random_state=42) # change type and load to GPU print 'load data to gpu' train_set_x = train_set_x.reshape(-1, 1, 96, 96).astype(theano.config.floatX) valid_set_x = valid_set_x.reshape(-1, 1, 96, 96).astype(theano.config.floatX) train_set_y = train_set_y.astype(theano.config.floatX) valid_set_y = valid_set_y.astype(theano.config.floatX) train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y) valid_set_x, valid_set_y = shared_dataset(valid_set_x, valid_set_y) X = T.ftensor4('X') y = T.matrix('y') net = build_model_vanila_CNN(X, stride=1) network = net['prob'] train_fn, val_fn = build_update_functions(train_set_x, train_set_y, valid_set_x, valid_set_y, network, y, X) print 'compile done successfully' # train the network parameters n_iter = 10000 improvement_threshold = 0.999 patience = 10000
def test(self, dataset=None, presences=None, **kwargs): """ Test the mlp on the given dataset with the presences. """ save_costs_to_file = kwargs["save_exp_data"] batch_size = kwargs["batch_size"] save_patch_examples = False if kwargs.has_key("save_classified_patches"): save_patch_examples = kwargs["save_classified_patches"] if dataset is None or presences is None: raise Exception( "Dataset or presences for pretraining can't be None.") self.state = "test" test_set_patches = shared_dataset(dataset, name="test_set_x") presences = numpy.asarray(presences.tolist(), dtype="int32") test_set_pre = shared_dataset(presences, name="test_set_pre") test_set_pre = T.cast(test_set_pre, 'int32') # compute number of minibatches for training, validation and testing n_test_batches = int(math.ceil(dataset.shape[0] / batch_size)) if self.output == 1 or self.output == 2: pre_minitest_probs = numpy.zeros((dataset.shape[0], self.n_out)) else: pre_minitest_probs = numpy.zeros( (dataset.shape[0], self.n_out * self.no_of_patches)) ###################### # Testing the MODEL. # ###################### print '... pre-testing the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch y = T.ivector( 'y') # the labels are presented as 1D vector of presences pindex = T.lscalar('pindex') p_y_given_x = self.class_memberships if save_patch_examples: test_model = theano.function( inputs=[index, pindex], outputs=[ self.errors(y), p_y_given_x, self.raw_prediction_errors(y) ], givens={ self.input: test_set_patches[index * batch_size:(index + 1) * batch_size, pindex], y: test_set_pre[index * batch_size:(index + 1) * batch_size, pindex] }) else: test_model = theano.function( inputs=[index, pindex], outputs=[self.errors(y), p_y_given_x], givens={ self.input: test_set_patches[index * batch_size:(index + 1) * batch_size, pindex], y: test_set_pre[index * batch_size:(index + 1) * batch_size, pindex] }) test_losses = [] test_score = 0 for minibatch_index in xrange(n_test_batches): for pidx in xrange(self.no_of_patches): if save_patch_examples: test_loss, membership_probs, raw_errors = test_model( minibatch_index, pidx) patches = dataset[minibatch_index * batch_size:(minibatch_index + 1) * batch_size, pidx] self.record_classified_examples(patches, raw_errors) else: test_loss, membership_probs = test_model( minibatch_index, pidx) test_losses.append(test_loss) test_score = numpy.mean(test_loss) pre_batch_vals = presences[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size, pidx] if self.output == 1: pre_minitest_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] +=\ membership_probs if self.output == 2: pre_minitest_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] *=\ 10 * membership_probs else: pre_minitest_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size, pidx * self.n_out:\ (pidx + 1) * self.n_out] = membership_probs self.logRegressionLayer.update_conf_mat( pre_batch_vals, membership_probs) if not self.quiet: print( "Minibatch %i and its test error %f percent on patch %i" % (minibatch_index, test_score * 100, pidx)) if self.output == 2: pre_minitest_probs = numpy.sqrt(pre_minitest_probs) self.save_classified_patches() print "Confusion matrix:" print self.logRegressionLayer.conf_mat self.report_object_patch_statistics() fin_test_score = numpy.mean(test_losses) print("In the end final test score on whole image is %f\n" % (fin_test_score * 100)) self.data_dict['test_scores'].append(test_losses) self.data_dict['test_probs'].append(pre_minitest_probs) return fin_test_score, pre_minitest_probs
def train(self, data=None, presences=None, **kwargs): """ Pretrain the MLP on the patches of images. """ learning_rate = kwargs["learning_rate"] L1_reg = kwargs["L1_reg"] L2_reg = kwargs["L2_reg"] n_epochs = kwargs["nepochs"] cost_type = kwargs["cost_type"] save_exp_data = kwargs["save_exp_data"] batch_size = kwargs["batch_size"] normalize_weights = kwargs["normalize_weights"] presences = numpy.asarray(presences.tolist(), dtype="uint8") self.learning_rate = learning_rate # Assign the state of MLP: self.state = "train" if data is None or presences is None: raise Exception("Dataset or presences for pretraining can't be None.") if data.shape[0] != presences.shape[0]: raise Exception("Dataset and presences shape mismatch.") train_set_patches = shared_dataset(data, name="train_set_x") train_set_pre = shared_dataset(presences, name="train_set_pre") train_set_pre = T.cast(train_set_pre, "int32") # compute number of minibatches for training, validation and testing n_train_batches = int(math.ceil(data.shape[0] / batch_size)) if self.output == 1 or self.output == 2: pre_train_probs =\ numpy.zeros((data.shape[0], self.n_out)) else: pre_train_probs =\ numpy.zeros((data.shape[0], self.n_out * self.no_of_patches)) ###################### # Pretrain the MODEL # ###################### print '... pretraining the model' # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch y = T.ivector('y') # the labels are presented as 1D vector of presences pindex = T.lscalar('pindex') #construct the MLP class # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically. cost = self.get_cost_function(cost_type, y, L1_reg, L2_reg) p_y_given_x = self.class_memberships updates = self.sgd_updates(cost, learning_rate) # compiling a Theano function `train_model` that returns the cost, butx # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index, pindex], outputs=[cost, p_y_given_x], updates=updates, givens={ self.input: train_set_patches[index * batch_size:(index + 1) * batch_size, pindex], y: train_set_pre[index * batch_size:(index + 1) * batch_size, pindex] } ) epoch = 0 costs = [] Ws = [] while (epoch < n_epochs): epoch_costs = [] if normalize_weights: if epoch != 0: self.normalize_weights() if not self.quiet: print "Training epoch %d has started." % (epoch) for minibatch_index in xrange(n_train_batches): minibatch_costs = [] for pidx in xrange(self.no_of_patches): minibatch_avg_cost, membership_probs = train_model(minibatch_index, pidx) minibatch_costs.append(float(minibatch_avg_cost.tolist())) if self.output == 1: pre_train_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] += membership_probs if self.output == 2: pre_train_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] *= 10 * membership_probs else: pre_train_probs[minibatch_index * batch_size: (minibatch_index + 1) * batch_size, pidx * self.n_out: (pidx + 1) * self.n_out] = membership_probs if self.output == 2: pre_train_probs = numpy.sqrt(pre_train_probs) Ws.append(self.params[2]) epoch_costs.append(minibatch_costs) costs.append(epoch_costs) if not self.quiet: print "Normalizing the weights" epoch += 1 self.data_dict['costs'].append([costs]) self.data_dict['train_probs'].append(pre_train_probs) return costs, pre_train_probs
def mini_batch_sgd_with_annealing(motif, train_data, labels, xTrain_data, xTrain_targets, learning_rate, L1_reg, L2_reg, epochs, batch_size, hidden_dim, model_type, model_file=None, trained_model_dir=None, verbose=True, extra_args=None): # Preamble # # determine dimensionality of data and number of classes n_train_samples, data_dim = train_data.shape n_classes = len(set(labels)) # compute number of mini-batches for training, validation and testing train_set_x, train_set_y = shared_dataset(train_data, labels, True) xtrain_set_x, xtrain_set_y = shared_dataset(xTrain_data, xTrain_targets, True) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_xtrain_batches = xtrain_set_x.get_value(borrow=True).shape[0] / batch_size batch_index = T.lscalar() # containers to hold mini-batches x = T.matrix('x') y = T.ivector('y') net = get_network(x=x, in_dim=data_dim, n_classes=n_classes, hidden_dim=hidden_dim, model_type=model_type, extra_args=extra_args) if net is False: return False # cost function cost = (net.negative_log_likelihood(labels=y) + L1_reg * net.L1 + (L2_reg / n_train_samples) * net.L2_sq) xtrain_fcn = theano.function(inputs=[batch_index], outputs=net.errors(y), givens={ x: xtrain_set_x[batch_index * batch_size: (batch_index + 1) * batch_size], y: xtrain_set_y[batch_index * batch_size: (batch_index + 1) * batch_size] }) # gradients nambla_params = [T.grad(cost, param) for param in net.params] # update tuple dynamic_learning_rate = T.as_tensor_variable(learning_rate) # dynamic_learning_rate = learning_rate updates = [(param, param - dynamic_learning_rate * nambla_param) for param, nambla_param in zip(net.params, nambla_params)] # main function? could make this an attribute and reduce redundant code train_fcn = theano.function(inputs=[batch_index], outputs=cost, updates=updates, givens={ x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size], y: train_set_y[batch_index * batch_size: (batch_index + 1) * batch_size] }) train_error_fcn = theano.function(inputs=[batch_index], outputs=net.errors(y), givens={ x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size], y: train_set_y[batch_index * batch_size: (batch_index + 1) * batch_size] }) if model_file is not None: net.load_from_file(file_path=model_file, careful=True) # do the actual training batch_costs = [np.inf] add_to_batch_costs = batch_costs.append xtrain_accuracies = [] add_to_xtrain_acc = xtrain_accuracies.append train_accuracies = [] add_to_train_acc = train_accuracies.append xtrain_costs_bin = [] prev_xtrain_cost = 1e-10 best_xtrain_accuracy = -np.inf best_model = '' check_frequency = int(epochs / 10) for epoch in xrange(0, epochs): # evaluation of training progress and summary stat collection if epoch % check_frequency == 0: # get the accuracy on the cross-train data xtrain_errors = [xtrain_fcn(_) for _ in xrange(n_xtrain_batches)] avg_xtrain_errors = np.mean(xtrain_errors) avg_xtrain_accuracy = 100 * (1 - avg_xtrain_errors) # then the training set train_errors = [train_error_fcn(_) for _ in xrange(n_train_batches)] avg_training_errors = np.mean(train_errors) avg_train_accuracy = 100 * (1 - avg_training_errors) # collect for tracking progress add_to_xtrain_acc(avg_xtrain_accuracy) add_to_train_acc(avg_train_accuracy) xtrain_costs_bin += xtrain_errors if verbose: print("{0}: epoch {1}, batch cost {2}, train accuracy {3}, cross-train accuracy {4}" .format(motif, epoch, batch_costs[-1], avg_train_accuracy, avg_xtrain_accuracy), file=sys.stderr) # if we're getting better, save the model, the 'oldest' model should be the one with the highest # cross-train accuracy if avg_xtrain_accuracy >= best_xtrain_accuracy and trained_model_dir is not None: if not os.path.exists(trained_model_dir): os.makedirs(trained_model_dir) # update the best accuracy and best model best_xtrain_accuracy = avg_xtrain_accuracy best_model = "{0}model{1}.pkl".format(trained_model_dir, epoch) net.write(best_model) for i in xrange(n_train_batches): batch_avg_cost = train_fcn(i) if i % (n_train_batches / 10) == 0: add_to_batch_costs(float(batch_avg_cost)) # annealing protocol mean_xtrain_cost = np.mean([xtrain_fcn(_) for _ in xrange(n_xtrain_batches)]) if mean_xtrain_cost / prev_xtrain_cost < 1.0: dynamic_learning_rate *= 0.9 if mean_xtrain_cost > prev_xtrain_cost: dynamic_learning_rate *= 1.05 prev_xtrain_cost = mean_xtrain_cost # pickle the summary stats for the training summary = { "batch_costs": batch_costs, "xtrain_accuracies": xtrain_accuracies, "train_accuracies": train_accuracies, "xtrain_errors": xtrain_costs_bin, "best_model": best_model } if trained_model_dir is not None: with open("{}summary_stats.pkl".format(trained_model_dir), 'w') as f: cPickle.dump(summary, f) return net, summary
def test_mlp_parity(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=64, n_hidden=500, n_hiddenLayers=1, verbose=False): reader = csv.reader(open("joint_knee.csv", "rb"), delimiter=',') x = list(reader) #print x result = numpy.array(x) #print result.shape def score_to_numeric(x, a): if (x == 'Hospice - Home'): return 11 if (x == 'Psychiatric Hospital or Unit of Hosp'): return 10 if (x == 'Hospice - Medical Facility'): return 9 if (x == 'Expired'): return 8 if (x == 'Facility w/ Custodial/Supportive Care'): return 7 if (x.lower() == 'left against medical advice'): return 6 if (x.lower() == 'short-term hospital'): return 5 if (x.lower() == 'multi-racial' or x.lower() == 'home or self care'): return 4 if (x.lower() == 'other race' or x.lower() == 'emergency' or x.lower() == 'skilled nursing home' or x.lower() == 'not available'): return 3 if (x.lower() == 'm' or x.lower() == 'black/african american' or x.lower() == 'urgent' or x.lower() == 'inpatient rehabilitation facility'): return 2 if (x.lower() == 'f' or x.lower() == 'white' or x.lower() == 'elective' or x.lower() == 'home w/ home health services'): return 1 if (a == 1): return int(x[:2]) if (a == 2): return float(x[1:]) else: return float(x) rownum = 0 for row in result: # Save header row. if rownum == 0: rownum += 1 header = row for i in range(0, len(header)): if header[i].lower() == 'gender': gender = i if header[i].lower() == 'race': race = i if header[i].lower() == 'type of admission': admi = i if header[i].lower() == 'patient disposition': disp = i if header[i].lower() == 'age group': age = i if header[i].lower() == 'total charges': price = i else: row[gender] = score_to_numeric(row[gender], 0) row[race] = score_to_numeric(row[race], 0) row[admi] = score_to_numeric(row[admi], 0) row[disp] = score_to_numeric(row[disp], 0) row[age] = score_to_numeric(row[age], 1) row[price] = score_to_numeric(row[price], 2) for i in range(0, len(row)): row[i] = float(row[i]) #y = row[i].astype(numpy.float) #row[i] = y #print type(row[i]) #print type(result) #result = numpy.array(result).astype('float') #print result[1:(len(result)),1:] res = result[1:(len(result)), 1:].astype(numpy.float) for i in range(len(res)): for j in range(len(res[0])): if (j == 9): res[i, j] = int(round(res[i, j] / 10000)) else: res[i, j] = int(round(res[i, j])) myset = set(res[:, 9]) nout = len(myset) y = res[:, 9] #print y x = res[:, 0:9] iris = load_iris() clf = ExtraTreesClassifier() clf = clf.fit(x, y) model = SelectFromModel(clf, prefit=True) X_new = model.transform(x) data = np.c_[X_new, y] totallen = len(data) numpy.random.shuffle(data) training, validation, testing = data[:totallen / 2, :], data[totallen / 2:( 3 * totallen / 4), :], data[(3 * totallen / 4):, :] l = len(data[0]) - 1 train_set = [training[:, 0:l], training[:, l]] valid_set = [validation[:, 0:l], validation[:, l]] test_set = [testing[:, 0:l], testing[:, l]] #print train_set #print valid_set #print test_set # Convert raw dataset to Theano shared variables. train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) test_set_x, test_set_y = shared_dataset(test_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = myMLP(rng=rng, input=x, n_in=l, n_hidden=n_hidden, n_out=len(myset), n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) y_p_train = theano.function(inputs=[], outputs=[classifier.logRegressionLayer.y_pred], givens={x: train_set_x}) y_predict = theano.function(inputs=[], outputs=[classifier.logRegressionLayer.y_pred], givens={x: test_set_x}) y_pred1 = y_p_train() y_pred2 = y_predict() return y_pred1, y_pred2
batch_size = 5000 learning_rate = 3e-7 weight_decay = 2 # prepare the data print ".preparing data" dataset_paths = [ utils.complement_path('/share/blur_images/all_in_one/train_o_set.npy'), utils.complement_path('/share/blur_images/all_in_one/train_b_set.npy'), utils.complement_path('/share/blur_images/all_in_one/valid_o_set.npy'), utils.complement_path('/share/blur_images/all_in_one/valid_b_set.npy'), utils.complement_path('/share/blur_images/all_in_one/test_o_set.npy'), utils.complement_path('/share/blur_images/all_in_one/test_b_set.npy')] datasets = [ utils.shared_dataset(np.load(dataset_path)) for dataset_path in dataset_paths] # build the network print ".building network" normal = T.fmatrix('normal') corrupt = T.fmatrix('corrupt') index = T.lscalar('index') corrupt_input = corrupt.reshape((batch_size, 1, patch_shape[0], patch_shape[1])) normal_input = normal.reshape((batch_size, 1, patch_shape[0], patch_shape[1])) # patch extraction and representation, output shape=(33-9+1, 33-9+1)=(25, 25) layer0_conv = ConvLayer( input = corrupt_input,
np.save('/home/ubuntu/temp_data/y_valid_' + str(j), y_valid) np.save('/home/ubuntu/temp_data/x_test_' + str(j), normalized_data[test_index]) np.save('/home/ubuntu/temp_data/y_test_' + str(j), labels[test_index]) j = j + 1 del x_train, x_train_sm, x_valid, y_train, y_train_sm, y_valid, train_valid_data, train_valid_labels del normalized_data for j in range(k): print('--- iteration no. %d ---' % (j + 1)) x_train_sm, y_train_sm = shared_dataset( np.load('/home/ubuntu/temp_data/x_train_sm_' + str(j) + '.npy'), np.load('/home/ubuntu/temp_data/y_train_sm_' + str(j) + '.npy')) ###################### # BUILDING THE MODEL # ###################### print('building SDA...') numpy_rng = np.random.RandomState(np.random.randint(0, 10000)) sda = SdA(numpy_rng=numpy_rng, n_ins=visible_units, hidden_layers_sizes=hidden_layers_sizes, n_outs=2) n_train_batches = x_train_sm.get_value(borrow=True).shape[0] n_train_batches //= batch_size
def train(self, data=None, labels=None, **kwargs): learning_rate = kwargs["learning_rate"] L1_reg = kwargs["L1_reg"] L2_reg = kwargs["L2_reg"] n_epochs = kwargs["nepochs"] cost_type = kwargs["cost_type"] save_exp_data = kwargs["save_exp_data"] batch_size = kwargs["batch_size"] normalize_weights = kwargs["normalize_weights"] enable_dropout = kwargs["enable_dropout"] if data is None: raise Exception("Post-training can't start without pretraining class membership probabilities.") if labels is None: raise Exception("Post-training can not start without posttraining class labels.") self.state = "train" self.learning_rate = learning_rate train_set_x = shared_dataset(data, name="training_set_x") train_set_y = shared_dataset(labels, name="labels") train_set_y = T.cast(train_set_y, "int32") # compute number of minibatches for training n_examples = data.shape[0] n_train_batches = int(math.ceil(n_examples / batch_size)) ###################### # BUILD ACTUAL MODEL # ###################### print '...postraining the model' # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch y = T.ivector('y') # the labels are presented as 1D vector of int32 mode = "FAST_RUN" #import pudb; pudb.set_trace() if DEBUGGING: index.tag.test_value = 0 y.tag.test_value = numpy.ones(n_examples) mode = "DEBUG_MODE" # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically. cost = self.get_cost_function(cost_type, y, L1_reg, L2_reg) updates = self.sgd_updates(cost, learning_rate) # compiling a Theano function `train_model` that returns the cost, butx # in the same time updates the parameter of the model based on the rules # defined in `updates` # p_y_given_x = self.class_memberships train_model = theano.function(inputs=[index], outputs=cost, updates = updates, givens = { self.input: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }, mode=mode) if DEBUGGING: theano.printing.debugprint(train_model) epoch = 0 costs = [] Ws = [] while (epoch < n_epochs): print "In da epoch %d" % (epoch) for minibatch_index in xrange(n_train_batches): print "Postraining in Minibatch %i " % (minibatch_index) minibatch_avg_cost = train_model(minibatch_index) if enable_dropout: self.dropout() if normalize_weights: self.normalize_weights() costs.append(float(minibatch_avg_cost)) Ws.append(self.params[2]) epoch +=1 if save_exp_data: self.data_dict['Ws'].append(Ws) self.data_dict['costs'].append([costs]) self.save_data() return costs
def test(self, data=None, labels=None, **kwargs): save_exp_data = kwargs["save_exp_data"] batch_size = kwargs["batch_size"] if data is None: raise Exception("Post-training can't start without pretraining class membership probabilities.") if labels is None: raise Exception("Post-training can not start without posttraining class-membership probabilities.") test_set_x = shared_dataset(data) test_set_y = shared_dataset(labels) test_set_y = T.cast(test_set_y, "int32") self.state = "test" # compute number of minibatches for training, validation and testing n_examples = data.shape[0] n_test_batches = int(math.ceil(n_examples / batch_size)) print '...post-testing the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels mode = "FAST_RUN" if DEBUGGING: theano.config.compute_test_value = 'raise' index.tag.test_value = 0 y.tag.test_value = numpy.ones(n_examples) mode = "DEBUG_MODE" # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically # compiling a Theano function `test_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` test_model = theano.function(inputs=[index], outputs=self.errors(y), givens={ self.input: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}, mode=mode) ############### # TEST MODEL # ############### test_losses = [] for minibatch_index in xrange(n_test_batches): test_losses.append(float(test_model(minibatch_index))) test_score = numpy.mean(test_losses) print("Minibatch %i, mean test error %f" % (minibatch_index, test_score * 100)) if save_exp_data: self.data_dict['test_scores'].append(test_losses) self.save_data() return test_score, test_losses
# build the mask matrix for missing values, load it into theano shared variable # build masks where 0 values correspond to nan values temp = np.isnan(train_set_y) train_MASK = np.ones(temp.shape) train_MASK[temp] = 0 # still have to replace nan with something to avoid propagation in theano train_set_y[temp] = -1000 temp = np.isnan(valid_set_y) val_MASK = np.ones(temp.shape) val_MASK[temp] = 0 # still have to replace nan with something to avoid propagation in theano valid_set_y[temp] = -1000 # load into theano shared variable print 'load data to gpu \n' train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y) valid_set_x, valid_set_y = shared_dataset(valid_set_x, valid_set_y) val_MASK, train_MASK = shared_dataset(val_MASK, train_MASK) X = T.ftensor4('X') y = T.matrix('y') batch_size = 32 l2 = .0002 learn_rate = 1e-3 ##################################################### # # Continue a previous run # with open("results_backup.p", "rb") as f: # best_network_params, best_val_loss_, best_epoch_,train_loss_history_, val_loss_history_, network = pickle.load(f) # # extract input var
def test_mlp_parity(n_bit): #f=open('./problem_b/shallow_mlp_8bit.txt','w') #f=open('./problem_b/shallow_mlp_12bit.txt','w') f = open('./problem_b/deep_mlp_8bit.txt', 'w') #f=open('./problem_b/deep_mlp_12bit.txt','w') batch_size = 24 #n_hidden=24 n_hidden = (24, 24, 24, 24) learning_rate = 0.08 L1_reg = 0.0 L2_reg = 0.0 n_epochs = 300 n_hiddenLayers = 4 # generate datasets train_set = gen_parity_pair(n_bit, 2000) valid_set = gen_parity_pair(n_bit, 500) test_set = gen_parity_pair(n_bit, 100) # Convert raw dataset to Theano shared variables. train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) test_set_x, test_set_y = shared_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels #training_enabled = T.iscalar('training_enabled') ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') #print('... building the model', file=f) rng = np.random.RandomState(23455) layers_input = x.reshape((batch_size, n_bit)) layers = myMLP(rng, input=layers_input, n_in=n_bit, n_hidden=n_hidden, n_out=2, n_hiddenLayers=n_hiddenLayers) test_model = theano.function( [index], layers.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], layers.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], #training_enabled: numpy.cast['int32'](0) }) cost = layers.negative_log_likelihood( y) + layers.L1 * L1_reg + layers.L2_sqr * L2_reg params = layers.params grads = [T.grad(cost, param) for param in params] updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], #training_enabled: numpy.cast['int32'](1) }) ############### # TRAIN MODEL # ############### print('... training') #print('... training',file=f) train_nn(train_model=train_model, validate_model=validate_model, test_model=test_model, n_train_batches=n_train_batches, n_valid_batches=n_valid_batches, n_test_batches=n_test_batches, n_epochs=n_epochs, fil=f) f.close()
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,timegap = 0.5,): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ train_set, valid_set, test_set = load_data(theano_shared=False) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) print test_set_y.eval().shape # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size print 'n_train_batches : ',n_train_batches print 'n_valid_batches : ',n_valid_batches print 'n_test_batches : ',n_test_batches ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. # input, n_in, n_hidden, n_out, n_hiddenLayers classifier = myMLP(rng, input = x, n_in =2304 , n_hidden = n_hidden, n_out= 7, n_hiddenLayers= n_hiddenLayers,parameters =None) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) print ('MODEL TRAINED..') ########## # SAVE the MODEL ########## import os modelFolderName = 'mlp_models' cmd = 'mkdir %s'%modelFolderName os.system(cmd) save_model(classifier,n_hidden,n_hiddenLayers,modelFolderName + '/'+'mlp_classifier_nhidden_%s_hiddenlayers_%s_batchSize_%s_epochs_%s'%(n_hidden,n_hiddenLayers,batch_size,n_epochs)) print 'Model Saved. ' # modelFolderName = 'mlp_models' # modelName = 'mlp_classifier_nhidden_500_hiddenlayers_3_batchSize_20_epochs_2_json.save' # test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=2,batch_size=20, n_hidden=500, n_hiddenLayers=3,verbose=True, smaller_set=False) # predict_from_trained_model(modelFolderName +'/'+modelName)
# if not os.path.exists(save_path): # os.makedirs(save_path); print 'create dir',save_path # save_the_env(dir_to_save='../cifar10', path=save_path) if nndF: X = T.matrix('X') logistic_reg = unpickle(nnd_path+'/best_model.pkl') get_lr_pred = theano.function([X], logistic_reg.forward(X)) import cPickle, gzip f = gzip.open(datapath, 'rb') train_set_np, valid_set_np, test_set_np = cPickle.load(f) f.close() N ,D = train_set_np[0].shape; Nv,D = valid_set_np[0].shape; Nt,D = test_set_np[0].shape train_set = shared_dataset(train_set_np) valid_set = shared_dataset(valid_set_np) test_set = shared_dataset(test_set_np ) print 'batch sz %d, epsilon gen %g, epsilon dis %g, hnum_z %d, num_conv_hid %g, num_epoch %di, lam %g' % \ (batch_sz, epsilon_gen, epsilon_dis, num_z, conv_num_hid, num_epoch, lam) book_keeping = [] num_hids = [num_hid1] train_params = [num_epoch, epoch_start, contF] opt_params = [batch_sz, epsilon_gen, epsilon_dis, momentum, num_epoch, N, Nv, Nt, lam] ganI_params = [batch_sz, D, num_hids, rng, num_z, nkerns, ckern, num_channel] conv_params = [conv_num_hid, D, num_class, batch_sz, num_channel] min_vl_cost = main(train_set, valid_set, test_set, opt_params, ganI_params, train_params, conv_params) book_keeping.append(min_vl_cost)
def test(self, dataset=None, presences=None, **kwargs): """ Test the mlp on the given dataset with the presences. """ save_costs_to_file = kwargs["save_exp_data"] batch_size = kwargs["batch_size"] save_patch_examples = False if kwargs.has_key("save_classified_patches"): save_patch_examples = kwargs["save_classified_patches"] if dataset is None or presences is None: raise Exception("Dataset or presences for pretraining can't be None.") self.state = "test" test_set_patches = shared_dataset(dataset, name="test_set_x") presences = numpy.asarray(presences.tolist(), dtype="int32") test_set_pre = shared_dataset(presences, name="test_set_pre") test_set_pre = T.cast(test_set_pre, 'int32') # compute number of minibatches for training, validation and testing n_test_batches = int(math.ceil(dataset.shape[0] / batch_size)) if self.output == 1 or self.output == 2: pre_minitest_probs = numpy.zeros((dataset.shape[0], self.n_out)) else: pre_minitest_probs = numpy.zeros((dataset.shape[0], self.n_out * self.no_of_patches)) ###################### # Testing the MODEL. # ###################### print '... pre-testing the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch y = T.ivector('y') # the labels are presented as 1D vector of presences pindex = T.lscalar('pindex') p_y_given_x = self.class_memberships if save_patch_examples: test_model = theano.function( inputs=[index, pindex], outputs=[self.errors(y), p_y_given_x, self.raw_prediction_errors(y)], givens={ self.input: test_set_patches[index * batch_size: (index + 1) * batch_size, pindex], y: test_set_pre[index * batch_size: (index + 1) * batch_size, pindex] } ) else: test_model = theano.function( inputs=[index, pindex], outputs=[self.errors(y), p_y_given_x], givens={ self.input: test_set_patches[index * batch_size: (index + 1) * batch_size, pindex], y: test_set_pre[index * batch_size: (index + 1) * batch_size, pindex] } ) test_losses = [] test_score = 0 for minibatch_index in xrange(n_test_batches): for pidx in xrange(self.no_of_patches): if save_patch_examples: test_loss, membership_probs, raw_errors = test_model(minibatch_index, pidx) patches = dataset[minibatch_index * batch_size: (minibatch_index + 1) * batch_size, pidx] self.record_classified_examples(patches, raw_errors) else: test_loss, membership_probs = test_model(minibatch_index, pidx) test_losses.append(test_loss) test_score = numpy.mean(test_loss) pre_batch_vals = presences[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size, pidx] if self.output == 1: pre_minitest_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] +=\ membership_probs if self.output == 2: pre_minitest_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] *=\ 10 * membership_probs else: pre_minitest_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size, pidx * self.n_out:\ (pidx + 1) * self.n_out] = membership_probs self.logRegressionLayer.update_conf_mat(pre_batch_vals, membership_probs) if not self.quiet: print("Minibatch %i and its test error %f percent on patch %i" % (minibatch_index, test_score * 100, pidx)) if self.output == 2: pre_minitest_probs = numpy.sqrt(pre_minitest_probs) self.save_classified_patches() print "Confusion matrix:" print self.logRegressionLayer.conf_mat self.report_object_patch_statistics() fin_test_score = numpy.mean(test_losses) print("In the end final test score on whole image is %f\n" % (fin_test_score * 100)) self.data_dict['test_scores'].append(test_losses) self.data_dict['test_probs'].append(pre_minitest_probs) return fin_test_score, pre_minitest_probs
def run(rng_seed,ltype, mtype,load_path, load_epoch, sample=False, nclass=10, whichclass=None, verbose=False, class_list=None, ckernr=None, cri_ckern=None): assert ckernr!=None # ltype -> GAN LSGAN WGAN # JS 0.4+-asdf # LS # WA # MMD # IS ### MODEL PARAMS ### MODEL PARAMS # ltype = sys.argv[3] # mtype = 'js' # print 'ltype: ' + ltype # print 'mtype: ' + mtype mmdF = False nndF = False # CONV (DISC) conv_num_hid= 100 num_channel = 3 #Fixed num_class = 1 #Fixed D=64*64*3 kern=int(ckernr.split('_')[0]) ### OPT PARAMS batch_sz = 100 momentum = 0.0 #Not Used lam = 0.0 epsilon_dis = 0.0002 epsilon_gen = 0.0001 # if mtype =='js' : # epsilon_dis = 0.0002 # epsilon_gen = 0.0001 # K=5 #FIXED # J=1 # elif mtype == 'ls': # epsilon_dis = 0.0002 # epsilon_gen = 0.0001 # K=5 #FIXED # J=1 # else: # epsilon_dis = 0.0002 # epsilon_gen = 0.0001 # K=2 #FIXED # J=1 # ganI (GEN) filter_sz = 4 #FIXED nkerns = [1,8,4,2,1] ckern = int(ckernr.split('_')[-1]) #20 num_hid1 = nkerns[0]*ckern*filter_sz*filter_sz #Fixed num_z = 100 ### TRAIN PARAMS num_epoch = 10 epoch_start = 0 #Fixed contF = True #Fixed num_hids = [num_hid1] input_width = 64 input_height = 64 input_depth = 3 ### SAVE PARAM model_param_save = 'num_hid%d.batch%d.eps_dis%g.eps_gen%g.num_z%d.num_epoch%g.lam%g.ts%d.data.100_CONV_lsun'%(conv_num_hid,batch_sz, epsilon_dis, epsilon_gen, num_z, num_epoch, lam1, num_steps) # device=sys.argv[1] import os os.environ['RNG_SEED'] = str(rng_seed) os.environ['LOAD_PATH'] = load_path os.environ['LOAD_EPOCH'] = str(load_epoch) os.environ['LTYPE'] = ltype # os.environ['MTYPE'] = mtype try: a=os.environ['CRI_KERN'] except: if cri_ckern!=None: os.environ['CRI_KERN']=cri_ckern else: raise RuntimeError('cri_kern not provided') import theano import theano.sandbox.rng_mrg as RNG_MRG rng = np.random.RandomState(int(os.environ['RNG_SEED'])) MRG = RNG_MRG.MRG_RandomStreams(rng.randint(2 ** 30)) from util_cifar10 import load_cifar10 from utils import shared_dataset, unpickle import pwd; username = pwd.getpwuid(os.geteuid()).pw_name global nnd_path if username in ['hma02', 'mahe6562']: if username=='hma02': datapath = '/mnt/data/hma02/data/cifar10/cifar-10-batches-py/' save_path = '/mnt/data/hma02/gap/dcgan-cifar10/' nnd_path = '/mnt/data/hma02/gap/' else: datapath = '/scratch/g/gwtaylor/mahe6562/data/cifar10/cifar-10-batches-py/' save_path = '/scratch/g/gwtaylor/mahe6562/gap/dcgan-cifar10/' nnd_path = '//scratch/g/gwtaylor/mahe6562/gap/' import time; date = '%d-%d' % (time.gmtime()[1], time.gmtime()[2]) import os; worker_id = os.getpid() save_path+= date+'-%d-%s/' % (worker_id,ltype) # if not os.path.exists(save_path): # os.makedirs(save_path); print 'create dir',save_path # # save_the_env(dir_to_save='../mnist', path=save_path) global train_set_np,valid_set_np,test_set_np train_set_np, valid_set_np, test_set_np = load_cifar10(path=datapath, verbose=False) # 127.5 - 1. in order to rescale to -1 to 1. train_set_np[0] = train_set_np[0] / 255.0 #127.5 - 1. valid_set_np[0] = valid_set_np[0] / 255.0 #127.5 - 1. test_set_np[0] = test_set_np[0] / 255.0 #127.5 - 1. N ,D = train_set_np[0].shape; Nv,D = valid_set_np[0].shape; Nt,D = test_set_np[0].shape train_set = shared_dataset(train_set_np) valid_set = shared_dataset(valid_set_np) test_set = shared_dataset(test_set_np ) # print 'batch sz %d, epsilon gen %g, epsilon dis %g, hnum_z %d, num_conv_hid %g, num_epoch %di, lam %g' % \ # (batch_sz, epsilon_gen, epsilon_dis, num_z, conv_num_hid, num_epoch, lam) book_keeping = [] num_hids = [num_hid1] train_params = [num_epoch, epoch_start, contF] opt_params = [batch_sz, epsilon_gen, epsilon_dis, momentum, num_epoch, N, Nv, Nt, lam] ganI_params = [batch_sz, D, num_hids, rng, num_z, nkerns, ckern, num_channel] conv_params = [conv_num_hid, D, num_class, batch_sz, num_channel, kern] if sample==True: samples = main(train_set, valid_set, test_set, opt_params, ganI_params, train_params, conv_params, sample) return 0,0,0,0 else: te_score_ls, te_score_iw , mmd_te , is_sam = main(train_set, valid_set, test_set, opt_params, ganI_params, train_params, conv_params, sample) return te_score_ls, te_score_iw , mmd_te , is_sam
def train(learning_rate=0.1, n_epochs=10, kernel_shapes = [7,5], nkerns=[15,15], batch_size=1000, batch_type = 'fast', mynet = 'best', representation='raw', momentum=0, history=4): # TODO: implement history of boards rng = numpy.random.RandomState(42) trainP = 0.998 validP = 0.001 testP = 0.001 print "... Reading cached values ..." (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("results/lengths.cache",'r')) print "... Getting filenames ..." datasetKGS = "../../go-data" datasetPro = "../../pro-GoGod" # use both datasets, test and valid set are only Pro games # fn1 = readGame.getFilenames(datasetKGS,1,0,1)[0] # random.shuffle(fn1) # fn2 = readGame.getFilenames(datasetPro,1,0,1)[0] # NOTE: last 5% of professional games never used! # fn2 = fn2[:int(len(fn2)*0.95)] # random.shuffle(fn2) # filenames = fn2 #fn1 + fn2 n = len(filenames) print "... Learning set contains " + str(n) + " games" print "... Computing cumulative game lengths ..." trainNames = filenames[:int(trainP*n)] validNames = filenames[int(trainP*n):int(trainP*n+validP*n)] testNames = filenames[int(trainP*n+validP*n):int(trainP*n+validP*n+testP*n)] # random.shuffle(trainNames) # trainCumLengths = readGame.getCumGameLengths(trainNames) # validCumLengths = readGame.getCumGameLengths(validNames) # testCumLengths = readGame.getCumGameLengths(testNames) # fw = open("results/"lengths.cache","wb") # pickle.dump((trainCumLengths,validCumLengths,testCumLengths,filenames),fw) # fw.close() print "... Preprocessing initial batches ..." minn = batch_size / 80 +1 temp = time.time() test_batch_x, test_batch_y = utils.shared_dataset(readGame.processSGFs(testNames[:minn],representation),batch_size=batch_size) train_batch_x, train_batch_y = utils.shared_dataset(readGame.processSGFs(trainNames[:minn],representation),batch_size=batch_size) valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processSGFs(validNames[:minn],representation),batch_size=batch_size) print " average processing time per game: " + str((time.time()-temp)/18.0) + " seconds, per epoch: " + str(int((time.time()-temp)/18*n/60/60)) + " hours" # compute number of minibatches for training, validation and testing n_train_batches = trainCumLengths[-1] n_valid_batches = validCumLengths[-1] n_test_batches = testCumLengths[-1] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data iteration = T.lscalar() # iteration number of a minibatch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels gs = 19 # size of the go board ishape = (gs, gs) # this is the size of MNIST images fw = open("results/"+mynet+"_"+str(learning_rate)+"_"+str(nkerns[0])+".res","w") ###################### # BUILD ACTUAL MODEL # ###################### print '... Building the model ...' nc = 2 if representation=='raw' else 6 # if raw nc *= 1+history if mynet == "default": # default is 7x7, regular 3 kernels layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, nc, gs, gs), filter_shape=(nkerns[0], nc, 7, 7), poolsize=(1, 1)) layer2_input = layer0.output.flatten(2) layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[0] * 13 * 13, n_out=500, activation=T.tanh) layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=361) cost = layer3.negative_log_likelihood(y) # prevGrads = [theano.shared(numpy.zeros((500,361),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((361,),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((nkerns[0] *13*13,500), dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((500,),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((nkerns[0],nc,7,7),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True), # ] params = layer3.params + layer2.params + layer0.params if mynet == "best": ks = kernel_shapes sp1= gs-ks[0]+1 sp2= sp1-ks[1]+1 layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, nc, gs, gs), filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], sp1, sp1), filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1)) layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs) cost = layer3.negative_log_likelihood(y) prevGrads = [theano.shared(numpy.zeros((nkerns[1]*9*9,361),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((gs*gs,),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[0],nkerns[1],ks[1],ks[1]), dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[1],),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[0],nc,ks[0],ks[0]),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True), ] params = layer3.params + layer1.params + layer0.params if mynet == "padded": # TODO: add zero padding test deeper architectures ks = kernel_shapes sp1= gs-ks[0]+1 sp2= sp1-ks[1]+1 layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, nc, gs, gs), filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], sp1, sp1), filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1)) layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer1.params + layer0.params # create a function to compute the mistakes that are made by the model test_model = theano.function([], layer3.errors(y), givens={ x: test_batch_x, y: T.cast(test_batch_y, 'int32')}) validate_model = theano.function([], layer3.errors(y), givens={ x: valid_batch_x, y: T.cast(valid_batch_y, 'int32')}) predictions = theano.function([], layer3.get_predictions(), givens={ x: valid_batch_x}) conditional_dist = theano.function([], layer3.get_conditional_dist(), givens={ x: valid_batch_x}) # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] #adjusted_rate = learning_rate - iteration*(learning_rate/(float(n_epochs) * n_train_batches)) adjusted_rate = learning_rate if T.lt(iteration,3000*200) else 0.1*learning_rate for param_i, grad_i in zip(params, grads):#, prev_grad_i , prevGrads): updates.append((param_i, param_i - adjusted_rate * grad_i))# - momentum * prev_grad_i)) #for i,grad in enumerate(grads): # updates.append((prevGrads[i], grad)) train_model = theano.function([iteration], cost, updates=updates, givens={ x: train_batch_x, y: T.cast(train_batch_y, 'int32')},on_unused_input='ignore') ############### # TRAIN MODEL # ############### print '... Training ...' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = 10000 # min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False stime = time.time() while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 1000 == 0: print 'training @ iter = ', iter pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w")) if iter ==5: print 'estimated train time per epoch = '+ str((time.time() - stime) * n_train_batches/60.0/iter/60.0) + " hours" ax,ay = getBatch(trainNames, minibatch_index, trainCumLengths, batch_size,representation,batchType=batch_type,history=history) train_batch_x.set_value(ax) train_batch_y.set_value(ay) cost_ij = train_model(iter) if (iter + 1) % validation_frequency == 0 or iter==5: # compute zero-one loss on validation set validation_losses = [] for i in xrange(n_valid_batches): vx,vy = getBatch(validNames, i, validCumLengths, batch_size,representation,batchType='fast',history=history) valid_batch_x.set_value(vx) valid_batch_y.set_value(vy) validation_losses.append(validate_model()) this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses=[] for i in xrange(n_test_batches): tx,ty = getBatch(testNames, i, testCumLengths, batch_size,representation,batchType='fast',history=history) test_batch_x.set_value(tx) test_batch_y.set_value(ty) test_losses.append(test_model()) test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) fw.write("Epoch "+str(epoch) + ": " +str((1-this_validation_loss)*100.)+ "%\n") pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w")) #if patience <= iter: # done_looping = True # break fw.close() end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def computeAcc(pred, y): return 1 if numpy.argmax(pred) == y else 0 model = pickle.load(open("results/1raw0.model", "r")) (updates, cost, layer0, layer1, layer3, test_model, predictions, conditional_dist) = model (trainCumLengths, validCumLengths, testCumLengths, filenames) = pickle.load(open("results/lengths.cache", 'r')) fn = filenames[:2000] fncl = trainCumLengths[:2000] batch_size = 1 valid_batch_x, valid_batch_y = utils.shared_dataset( readGame.processSGFs(filenames[:6], 'raw')) test_batch_x, test_batch_y = utils.shared_dataset( readGame.processSGFs(filenames[:6], 'raw')) c = 0 while (True): c += 1 fr = open("c2py", "r") txt = fr.read() fr.close() if txt == '': print "EMPTY INPUT" raise IOError #print "INPUT= '"+ txt + "'" #print "txt = " + txt
def train(self, data=None, presences=None, **kwargs): """ Pretrain the MLP on the patches of images. """ learning_rate = kwargs["learning_rate"] L1_reg = kwargs["L1_reg"] L2_reg = kwargs["L2_reg"] n_epochs = kwargs["nepochs"] cost_type = kwargs["cost_type"] save_exp_data = kwargs["save_exp_data"] batch_size = kwargs["batch_size"] normalize_weights = kwargs["normalize_weights"] presences = numpy.asarray(presences.tolist(), dtype="uint8") self.learning_rate = learning_rate # Assign the state of MLP: self.state = "train" if data is None or presences is None: raise Exception( "Dataset or presences for pretraining can't be None.") if data.shape[0] != presences.shape[0]: raise Exception("Dataset and presences shape mismatch.") train_set_patches = shared_dataset(data, name="train_set_x") train_set_pre = shared_dataset(presences, name="train_set_pre") train_set_pre = T.cast(train_set_pre, "int32") # compute number of minibatches for training, validation and testing n_train_batches = int(math.ceil(data.shape[0] / batch_size)) if self.output == 1 or self.output == 2: pre_train_probs =\ numpy.zeros((data.shape[0], self.n_out)) else: pre_train_probs =\ numpy.zeros((data.shape[0], self.n_out * self.no_of_patches)) ###################### # Pretrain the MODEL # ###################### print '... pretraining the model' # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch y = T.ivector( 'y') # the labels are presented as 1D vector of presences pindex = T.lscalar('pindex') #construct the MLP class # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically. cost = self.get_cost_function(cost_type, y, L1_reg, L2_reg) p_y_given_x = self.class_memberships updates = self.sgd_updates(cost, learning_rate) # compiling a Theano function `train_model` that returns the cost, butx # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index, pindex], outputs=[cost, p_y_given_x], updates=updates, givens={ self.input: train_set_patches[index * batch_size:(index + 1) * batch_size, pindex], y: train_set_pre[index * batch_size:(index + 1) * batch_size, pindex] }) epoch = 0 costs = [] Ws = [] while (epoch < n_epochs): epoch_costs = [] if normalize_weights: if epoch != 0: self.normalize_weights() if not self.quiet: print "Training epoch %d has started." % (epoch) for minibatch_index in xrange(n_train_batches): minibatch_costs = [] for pidx in xrange(self.no_of_patches): minibatch_avg_cost, membership_probs = train_model( minibatch_index, pidx) minibatch_costs.append(float(minibatch_avg_cost.tolist())) if self.output == 1: pre_train_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] += membership_probs if self.output == 2: pre_train_probs[minibatch_index * batch_size:\ (minibatch_index + 1) * batch_size] *= 10 * membership_probs else: pre_train_probs[minibatch_index * batch_size:(minibatch_index + 1) * batch_size, pidx * self.n_out:(pidx + 1) * self.n_out] = membership_probs if self.output == 2: pre_train_probs = numpy.sqrt(pre_train_probs) Ws.append(self.params[2]) epoch_costs.append(minibatch_costs) costs.append(epoch_costs) if not self.quiet: print "Normalizing the weights" epoch += 1 self.data_dict['costs'].append([costs]) self.data_dict['train_probs'].append(pre_train_probs) return costs, pre_train_probs
def test_data_augmentation(learning_rate=0.01,L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False,steps = 1): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0],'top',steps) train_set_x_d = translate_image(train_set[0],'bottom',steps) train_set_x_r = translate_image(train_set[0],'right',steps) train_set_x_l = translate_image(train_set[0],'left',steps) # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def evaluate_lenet(learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500): rng = np.random.RandomState(12345) print("Loading datasets...") train_set, valid_set, test_set = utils.load_MNIST() train_set_X, train_set_y = utils.shared_dataset(train_set) valid_set_X, valid_set_y = utils.shared_dataset(valid_set) test_set_X, test_set_y = utils.shared_dataset(test_set) # we cut data to batches so that we can efficiently load them to # GPU (if needed) n_train_batches = train_set_X.get_value(borrow=True).shape[0] n_valid_batches = valid_set_X.get_value(borrow=True).shape[0] n_test_batches = test_set_X.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size index = T.lscalar() # index to batches X = T.matrix("X") y = T.ivector("y") print("Building the model...") # now we construct a 4-layer CNN # our inputs are 28*28 images with only one feature map, so we # reshape it to (batch_size, 1, 28, 28) layer0_input = X.reshape((batch_size, 1, 28, 28)) # layer0: convolution+max-pooling layer layer0 = layers.ConvPoolLayer( rng=rng, input=layer0_input, input_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # layer1: convolution+max-pooling layer layer1 = layers.ConvPoolLayer( rng=rng, input=layer0.output, input_shape=layer0.output_shape, filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # layer2: fully-connected hidden layer layer2 = layers.MLPLayer( rng=rng, input=layer1.output.flatten(2), n_in=np.prod(layer1.output_shape[1:]), n_out=layer1.output_shape[0], activation=T.tanh ) # layer3: logistic regression layer3 = layers.LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) cost = layer3.negative_log_likelihood(y) # construct functions to compute errors on test/validation sets valid_error = theano.function( [index], layer3.errors(y), givens={ X: valid_set_X[index*batch_size:(index+1)*batch_size], y: valid_set_y[index*batch_size:(index+1)*batch_size] } ) test_error = theano.function( [index], layer3.errors(y), givens={ X: test_set_X[index*batch_size:(index+1)*batch_size], y: test_set_y[index*batch_size:(index+1)*batch_size] } ) # a list of all parameters in this model params = layer0.params + layer1.params + layer2.params + layer3.params grads = T.grad(cost, params) # parameter update rule in stochastic gradient descent updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ X: train_set_X[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size] } ) predict_model = theano.function([X], layer3.output) print("Training...") # we use the early-stopping strategy patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience // 2) best_validation_score = 0. best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done = False while (epoch < n_epochs) and (not done): epoch += 1 for minibatch_index in range(n_train_batches): iter = (epoch-1) * n_train_batches + minibatch_index if iter % 100 == 0: print("iter =", iter) train_model(minibatch_index) if (iter+1) % validation_frequency == 0: valid_errors = [valid_error(i) for i in range(n_valid_batches)] score = 1 - np.mean(valid_errors) print('epoch {}, minibatch {}/{}, validation accuracy {}' .format(epoch, minibatch_index + 1, n_train_batches, score)) if score > best_validation_score: best_validation_score = score best_iter = iter # increase patience if improvement is large enough if (1-score) < \ (1-best_validation_score) * improvement_threshold: patience = max(patience, iter * patience_increase) # test it on test set test_errors = [test_error(i) for i in range(n_test_batches)] test_score = 1 - np.mean(test_errors) print(' test score:', test_score) # store best model to file with open('tmp/best_cnn.pkl', 'wb') as f: pickle.dump((predict_model, batch_size), f) if patience <= iter: done = True break # break the batches loop end_time = timeit.default_timer() print('Finished training. Total time:', (end_time - start_time) / 60, 'min') print('Best validation score of', best_validation_score, 'obtained at iter', best_iter) print('Precision: ', test_score)
def train(learning_rate=0.1, n_epochs=100, batch_size=320, batch_type = 'fast', mynet = 'one', representation='raw', momentum=0, history=0): rng = numpy.random.RandomState(42) trainP = 0.8 validP = 0.1 testP = 0.1 # print "... Reading cached values ..." # (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("results/5x5.cache",'r')) print "... Getting filenames ..." datasetMY = "../MC player/20kgames9" fn1 = readGame.getFilenames(datasetMY,1,0,1)[0] random.shuffle(fn1) filenames = fn1 n = len(filenames) print "... Learning set contains " + str(n) + " games" print "... Computing cumulative game lengths ..." trainNames = filenames[:int(trainP*n)] validNames = filenames[int(trainP*n):int(trainP*n+validP*n)] testNames = filenames[int(trainP*n+validP*n):int(trainP*n+validP*n+testP*n)] random.shuffle(trainNames) trainCumLengths = readGame.getCumGameLengths(trainNames,ftype="game") validCumLengths = readGame.getCumGameLengths(validNames,ftype="game") testCumLengths = readGame.getCumGameLengths(testNames,ftype="game") fw = open("results/"+str(gs)+"x"+str(gs)+".cache","wb") pickle.dump((trainCumLengths,validCumLengths,testCumLengths,filenames),fw) fw.close() print "... Preprocessing initial batches ..." minn = batch_size / 10 +1 temp = time.time() test_batch_x, test_batch_y = utils.shared_dataset(readGame.processGAMEs(testNames[:minn],representation,gs=gs),batch_size=batch_size,board_size=gs) train_batch_x, train_batch_y = utils.shared_dataset(readGame.processGAMEs(trainNames[:minn],representation,gs=gs),batch_size=batch_size,board_size=gs) valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processGAMEs(validNames[:minn],representation,gs=gs),batch_size=batch_size,board_size=gs) print " average processing time per game: " + str((time.time()-temp)/18.0) + " seconds, per epoch: " + str(int((time.time()-temp)/18*n/60/60)) + " hours" # compute number of minibatches for training, validation and testing n_train_batches = trainCumLengths[-1] n_valid_batches = validCumLengths[-1] n_test_batches = testCumLengths[-1] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data iteration = T.lscalar() # iteration number of a minibatch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (gs, gs) # this is the size of MNIST images fw = open("results/"+mynet+"_"+str(learning_rate)+"_"+".res","w") ###################### # BUILD ACTUAL MODEL # ###################### print '... Building the model ...' nc = 2 if representation=='raw' else 6 # if raw nc *= 1+history if mynet == "zero": layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LogisticRegression(input=layer0_input.flatten(2), n_in=nc*gs*gs, n_out=gs*gs) cost = layer0.negative_log_likelihood(y) params = layer0.params if mynet == "one": nHiddens = 500 layer1_input = x.reshape((batch_size, nc, gs, gs)) layer1 = HiddenLayer(rng, input=layer1_input.flatten(2), n_in=nc * gs * gs, n_out=nHiddens, activation=T.tanh) layer0 = LogisticRegression(input=layer1.output, n_in=nHiddens, n_out=gs*gs) cost = layer0.negative_log_likelihood(y) params = layer0.params + layer1.params # create a function to compute the mistakes that are made by the model test_model = theano.function([], layer0.errors(y), givens={ x: test_batch_x, y: T.cast(test_batch_y, 'int32')}) validate_model = theano.function([], layer0.errors(y), givens={ x: valid_batch_x, y: T.cast(valid_batch_y, 'int32')}) predictions = theano.function([], layer0.get_predictions(), givens={ x: valid_batch_x}) conditional_dist = theano.function([], layer0.get_conditional_dist(), givens={ x: valid_batch_x}) # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] #adjusted_rate = learning_rate - iteration*(learning_rate/(float(n_epochs) * n_train_batches)) adjusted_rate = learning_rate if T.lt(iteration,3000*200) else 0.1*learning_rate for param_i, grad_i in zip(params, grads):#, prev_grad_i , prevGrads): updates.append((param_i, param_i - adjusted_rate * grad_i))# - momentum * prev_grad_i)) #for i,grad in enumerate(grads): # updates.append((prevGrads[i], grad)) train_model = theano.function([iteration], cost, updates=updates, givens={ x: train_batch_x, y: T.cast(train_batch_y, 'int32')},on_unused_input='ignore') ############### # TRAIN MODEL # ############### print '... Training ...' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = 2000 # min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False stime = time.time() while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 500 == 0: print 'training @ iter = ', iter pickle.dump((updates,cost,layer0,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w")) if iter ==5: print 'estimated train time per epoch = '+ str((time.time() - stime) * n_train_batches/60.0/iter/60.0) + " hours" ax,ay = getBatch(trainNames, minibatch_index, trainCumLengths, batch_size,representation,batchType=batch_type,history=history) train_batch_x.set_value(ax) train_batch_y.set_value(ay) cost_ij = train_model(iter) if (iter + 1) % validation_frequency == 0 or iter==5: # compute zero-one loss on validation set validation_losses = [] for i in xrange(n_valid_batches): vx,vy = getBatch(validNames, i, validCumLengths, batch_size,representation,batchType='fast',history=history) valid_batch_x.set_value(vx) valid_batch_y.set_value(vy) validation_losses.append(validate_model()) this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses=[] for i in xrange(n_test_batches): tx,ty = getBatch(testNames, i, testCumLengths, batch_size,representation,batchType='fast',history=history) test_batch_x.set_value(tx) test_batch_y.set_value(ty) test_losses.append(test_model()) test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) #fw.write("Epoch "+str(epoch) + ": " +str((1-this_validation_loss)*100.)+ "%\n") pickle.dump((updates,cost,layer0,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w")) #if patience <= iter: # done_looping = True # break fw.close() end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))