def predict_webcam_image(classifier,x,y,timegap = .1):
	import cv2
	cap = cv2.VideoCapture(0)
	while(True):
		# Capture frame-by-frame
		ret, frame = cap.read()

		# Our operations on the frame come here
		gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

		#resize the image to 48x48 
		res = cv2.resize(gray,(48,48), interpolation = cv2.INTER_CUBIC)

		# Display the resulting frames
		cv2.imshow('frame',gray)
		cv2.imshow('resized',res)

		input_image = numpy.reshape(res,(1,48*48))

		#Close the frame when 'q' key is pressed.
		if cv2.waitKey(1) & 0xFF == ord('q'):
			break



		res = res/255
		temp = numpy.asarray([1])
		input_image_x,input_image_y = shared_dataset([input_image,temp])
		test_input_example = theano.function(
			inputs= [],
			outputs=[classifier.errors(y),classifier.p_y_given_x],
			givens={
				x: input_image_x,
				y: input_image_y
				}
			)

		error, input_image_output_probilities = test_input_example()

		print 'probabilities : ',input_image_output_probilities
		print 'Number predicted :', emotionDictionary[numpy.argmax(input_image_output_probilities)]
		print 'with probability :',numpy.max(input_image_output_probilities)*100
		print '-------------------------------------'

		time.sleep(timegap)

	# When everything done, release the capture
	cap.release()
	cv2.destroyAllWindows()
def load_model(filename):
    f = open(filename, 'rb') 
    filecontent= f.read()
    loaded_obj = json.loads(filecontent)
    f.close()
    print 'loaded'

    parameters_shared = loaded_obj['p']
    learning_rate = loaded_obj['learning_rate']
    n_epochs= loaded_obj['n_epochs'] 
    nkerns= loaded_obj['nkerns']
    batch_size= 1 
    verbose= loaded_obj['verbose']
    filterwidth_layer0= loaded_obj['filterwidth_layer0']
    filterheight_layer0= loaded_obj['filterheight_layer0']
    poolsize_layer0= loaded_obj['poolsize_layer0']
    filterwidth_layer1= loaded_obj['filterwidth_layer1']
    filterheight_layer1= loaded_obj['filterheight_layer1']
    poolsize_layer1= loaded_obj['poolsize_layer1']
    filterwidth_layer2= loaded_obj['filterwidth_layer2']
    filterheight_layer2= loaded_obj['filterheight_layer2']
    poolsize_layer2= loaded_obj['poolsize_layer2']
    neurons_hidden = loaded_obj['neurons_hidden']
    smaller_set= loaded_obj['smaller_set']


    parameters = []

    for p in parameters_shared:
        p1 = []
        for row in p:
            p1.append(numpy.asarray(row))
        parameters.append(numpy.asarray(p1))

    rng = numpy.random.RandomState(23455)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 48, 48))


    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape= (batch_size, 1, 48, 48),
        filter_shape= (nkerns[0],1,filterwidth_layer0,filterheight_layer0),
        W= parameters[-2],
        b=parameters[-1],
        poolsize= (poolsize_layer0,poolsize_layer0)
    )
    
    print '-------------------------------------------------------------------------------------------- \n'
    layer0_outputwidth,layer0_outputheight = ( (48-filterwidth_layer0+1)/poolsize_layer0,(48-filterheight_layer0+1)/poolsize_layer0 )
    print 'Layer0 build. Shape of feature map  :',layer0_outputwidth, layer0_outputheight, 'Number of feature maps : ',nkerns[0]
    
    print '-------------------------------------------------------------------------------------------- \n'
    
    
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size,nkerns[0],layer0_outputwidth,layer0_outputheight),
        filter_shape= (nkerns[1],nkerns[0],filterwidth_layer1,filterheight_layer1),
        W= parameters[-4],
        b=parameters[-3],
        poolsize=(poolsize_layer1,poolsize_layer1)
 
    )

    layer1_outputwidth,layer1_outputheight = (layer0_outputwidth-filterwidth_layer1+1)/poolsize_layer1,(layer0_outputheight-filterwidth_layer1+1)/poolsize_layer1  
    print 'Layer1 build. Shape of feature map :',layer1_outputwidth,layer1_outputheight, 'Number of feature maps : ',nkerns[1]
    
    print '-------------------------------------------------------------------------------------------- \n'
    poolsize_width_layer0_to_layer1 = layer0_outputwidth/layer1_outputwidth
    poolsize_height_layer0_to_layer1 = layer0_outputheight/layer1_outputheight
    print 'poolsize layer 0 o/p to layer 1 o/p width :',layer0_outputwidth/layer1_outputwidth
    print 'poolsize layer 0 o/p to layer 1 o/p height :',layer0_outputheight/layer1_outputheight
    
    
    layer0_output_ds = downsample.max_pool_2d(
            input=layer0.output,
            ds=(poolsize_width_layer0_to_layer1,poolsize_height_layer0_to_layer1), # TDOD: change ds
            ignore_border=True
    )

    # concatenate layer
    print 'max pool layer created. between output of layer0 and output of layer1. output of this max pool layer : ',layer0_outputwidth/poolsize_width_layer0_to_layer1,layer0_outputheight/poolsize_height_layer0_to_layer1
    print '-------------------------------------------------------------------------------------------- \n'
    layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1)

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer2_input,
        image_shape= (batch_size,nkerns[0]+nkerns[1],layer1_outputwidth,layer1_outputheight),
        filter_shape= (nkerns[2],nkerns[0]+nkerns[1],filterwidth_layer2,filterheight_layer2),
        W= parameters[-6],
        b=parameters[-5],
        poolsize=(poolsize_layer2,poolsize_layer2)        
    )
    
    print 'Input to Layer2 (not equal to output of Layer1) : ', nkerns[0]+nkerns[1]
    layer2_outputwidth,layer2_outputheight = (layer1_outputwidth-filterwidth_layer2+1)/poolsize_layer2,(layer1_outputheight-filterwidth_layer2+1)/poolsize_layer2  
    print 'Layer2 build. Shape of feature map :',layer2_outputwidth,layer2_outputheight, 'Number of feature maps : ',nkerns[2]

    print '-------------------------------------------------------------------------------------------- \n'
    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1).
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=nkerns[2] * layer2_outputwidth * layer2_outputwidth,
        n_out= neurons_hidden,
        W= parameters[-8],
        b=parameters[-7],
        activation=T.tanh
    )
    
    print 'MLP Layer created. Input neurons : ',nkerns[2] * layer2_outputwidth * layer2_outputwidth, ' Output neurons :',neurons_hidden
    print '-------------------------------------------------------------------------------------------- \n'
    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output,
        n_in= neurons_hidden,
        n_out=7,
        W= parameters[-10],
        b=parameters[-9])

    print 'Logistic Layer created. Input neurons : ',neurons_hidden, ' output neurons :',10
  
    print '-------------------------------------------------------------------------------------------- \n'
    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)
    
    
 
    print 'Model Created...'
    ###############
    # MAKE PREDICTION #
    ###############
    import cv2
    cap = cv2.VideoCapture(0)

    while(True):
        # Capture frame-by-frame
        ret, frame = cap.read()

        # Our operations on the frame come here
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        #resize the image to 48x48 
        res = cv2.resize(gray,(48,48), interpolation = cv2.INTER_CUBIC)

        # Display the resulting frames
        cv2.imshow('frame',gray)
        cv2.imshow('resized',res)

        input_image = numpy.reshape(res,(1,48*48))
        batch_size = 1
        #Close the frame when 'q' key is pressed.
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        


        res = res/255
        temp = numpy.asarray([1])
        input_image_x,input_image_y = shared_dataset([input_image,temp])

        test_input_example = theano.function(
            inputs= [],
            outputs=[layer4.errors(y),layer4.p_y_given_x],
            givens={
                x: input_image_x,
                y: input_image_y
                }
            )

        error, input_image_output_probilities = test_input_example()

        print 'probabilities : ',input_image_output_probilities
        print 'emotionq predicted :', emotionDictionary[numpy.argmax(input_image_output_probilities)]
        print 'with probability :',numpy.max(input_image_output_probilities)*100
        print '-------------------------------------'
        
        time.sleep(0)
예제 #3
0
def test_mlp_with_new_functionality(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,example_index = 0,adversarial_parameter = 0.01,distribution = 'constant'):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """

    # load the dataset; download the dataset if it is not present
    train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False)

    example_x = test_set[0][example_index:example_index+1]
    example_y = test_set[1][example_index:example_index+1]

    # example_x_reshape = numpy.reshape(example_x,(len(example_x),1))
    # example_y_reshape = numpy.reshape(example_y,(1,1))

    shared_example_x,shared_example_y = shared_dataset([example_x,example_y])
    # shared_example_x = shared_example_x.reshape(shared_example_x.reshape[0],-1)

    # shared_example_x = theano.shared(type =theano.tensor.matrix,value = numpy.asarray(example_x,dtype=theano.config.floatX),borrow = True)

    # shared_example_y  = theano.shared(type = theano.tensor.vector, value = numpy.asarray(example_y,dtype=theano.config.floatX),borrow = True)
    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    # print ' shapes of the shared_examples', shared_example_y,shared_example_x

    print 'n_train_batches : ',n_train_batches
    print 'n_valid_batches : ',n_valid_batches
    print 'n_test_batches : ',n_test_batches
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    # input, n_in, n_hidden, n_out, n_hiddenLayers
    classifier = myMLP(rng, input = x, n_in =3072 , n_hidden = n_hidden, n_out= 10, n_hiddenLayers= n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )


    test_example = theano.function(
        inputs= [index],
        outputs=[classifier.errors(y),classifier.p_y_given_x],
        givens={
            x: test_set_x[index * 1:(index + 1) * 1],
            y: test_set_y[index * 1:(index + 1) * 1]
            }
        )

    test_example2 = theano.function(
        inputs= [],
        outputs=[classifier.errors(y),classifier.p_y_given_x],
        givens={
            x: shared_example_x,
            y: shared_example_y
            }
        )



    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )



    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs


    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )



    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)

    ##################
    # Performing adversarial example testing# 
    ##################


    print '-------------------------------------'
    print 'example x :', example_x
    image = getImage(test_set[0][example_index])
    plt.figure(1)
    plt.imshow(image)

    print 'example y :', example_y
    print '-------------------------------------'

    classification, probabilities = test_example(example_index)

    if int(classification) == 0:
        print 'Correct classification performed :', True
    else:
        print 'Correct classification performed :', False
    
    print 'probabilities : ',probabilities
    print 'Number predicted :', numpy.argmax(probabilities)
    print 'with probability :',numpy.max(probabilities)*100
    print '-------------------------------------'

    gadversarial = T.vector()
    gadversarial = [T.grad(cost,x)]

    grad_cost_wrt_x = theano.function(
        inputs= [index],
        outputs=gadversarial,
        givens={
            x: test_set_x[index * 1:(index + 1) * 1],
            y: test_set_y[index * 1:(index + 1) * 1]
            }
        )

        
    print 'Creating adversarial example and trying to get results for that ...  \n \n \n '
    print '-------------------------------------'

    gradient_cost_wrt_x = grad_cost_wrt_x(example_index)

    gradient_sign =  numpy.sign(gradient_cost_wrt_x)

    gradient_sign = numpy.reshape(gradient_sign,(1,3072))

    adversarial_example_x = example_x + adversarial_parameter*gradient_sign

    input_image_x,input_image_y = shared_dataset([adversarial_example_x,example_y])

    test_input_example = theano.function(
        inputs= [],
        outputs=[classifier.errors(y),classifier.p_y_given_x],
        givens={
            x: input_image_x,
            y: input_image_y
            }
        )

    adversarial_classification, input_image_output_probilities = test_input_example()

    if int(adversarial_classification) == 0:
        print 'Correct adversarial classification performed :', True
    else:
        print 'Correct adversarial classification performed :', False
    
    image2 = getImage(adversarial_example_x)    
    plt.figure(2)
    plt.imshow(image2)
    

    print 'probabilities : ',input_image_output_probilities
    print 'Number predicted :', numpy.argmax(input_image_output_probilities)
    print 'with probability :',numpy.max(input_image_output_probilities)*100
    print '-------------------------------------'
예제 #4
0
    return 1.0/ sum(map(lambda p: 1 if p>=probs[y] else 0,probs))
   
def computeAcc(pred,y):
    return 1 if numpy.argmax(pred)==y else 0
  
    
model=pickle.load(open("../convolutional/results/100raw4.model","r"))
(updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist) = model


(trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("../convolutional/results/lengths.cache",'r'))
fn = filenames[:1000]
fncl = trainCumLengths[:1000]
batch_size = 1

valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processGAMEs(filenames[:6],'raw'))
test_batch_x, test_batch_y = utils.shared_dataset(readGame.processGAMEs(filenames[:6],'raw'))

    game # get game
    # set batch size to 1
    vx = utils.shared_dataset(game,representation='raw')
    #vx,vy = my_net.getBatch(fn, i, fncl, batch_size,'raw',batchType='fast',history=0)
    valid_batch_x.set_value(vx)
    
    #conds=numpy.array(conditional_dist())
    #move= numpy.argmax(conds)
    move = predictions()[0]
    move= utils.move2fuego(move)
    rets.append(move)
    fw = open("py2c","w")
    fw.write(str(move))
예제 #5
0
    data.drop_missing_values()
    # center data VGG style
    print 'center alexnet'
    data.center_alexnet()
    # generate test validation split
    train_set_x, valid_set_x, train_set_y, valid_set_y = train_test_split(
        data.X, data.y, test_size=0.2, random_state=42)
    # change type and load to GPU
    print 'load data to gpu'
    train_set_x = train_set_x.reshape(-1, 1, 96,
                                      96).astype(theano.config.floatX)
    valid_set_x = valid_set_x.reshape(-1, 1, 96,
                                      96).astype(theano.config.floatX)
    train_set_y = train_set_y.astype(theano.config.floatX)
    valid_set_y = valid_set_y.astype(theano.config.floatX)
    train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y)
    valid_set_x, valid_set_y = shared_dataset(valid_set_x, valid_set_y)

    X = T.ftensor4('X')
    y = T.matrix('y')
    net = build_model_vanila_CNN(X, stride=1)
    network = net['prob']
    train_fn, val_fn = build_update_functions(train_set_x, train_set_y,
                                              valid_set_x, valid_set_y,
                                              network, y, X)
    print 'compile done successfully'

    # train the network parameters
    n_iter = 10000
    improvement_threshold = 0.999
    patience = 10000
예제 #6
0
    def test(self, dataset=None, presences=None, **kwargs):
        """
        Test the mlp on the given dataset with the presences.
        """
        save_costs_to_file = kwargs["save_exp_data"]
        batch_size = kwargs["batch_size"]
        save_patch_examples = False

        if kwargs.has_key("save_classified_patches"):
            save_patch_examples = kwargs["save_classified_patches"]

        if dataset is None or presences is None:
            raise Exception(
                "Dataset or presences for pretraining can't be None.")

        self.state = "test"
        test_set_patches = shared_dataset(dataset, name="test_set_x")
        presences = numpy.asarray(presences.tolist(), dtype="int32")
        test_set_pre = shared_dataset(presences, name="test_set_pre")
        test_set_pre = T.cast(test_set_pre, 'int32')

        # compute number of minibatches for training, validation and testing
        n_test_batches = int(math.ceil(dataset.shape[0] / batch_size))

        if self.output == 1 or self.output == 2:
            pre_minitest_probs = numpy.zeros((dataset.shape[0], self.n_out))
        else:
            pre_minitest_probs = numpy.zeros(
                (dataset.shape[0], self.n_out * self.no_of_patches))

        ######################
        # Testing the MODEL. #
        ######################
        print '... pre-testing the model'

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        y = T.ivector(
            'y')  # the labels are presented as 1D vector of presences
        pindex = T.lscalar('pindex')

        p_y_given_x = self.class_memberships

        if save_patch_examples:
            test_model = theano.function(
                inputs=[index, pindex],
                outputs=[
                    self.errors(y), p_y_given_x,
                    self.raw_prediction_errors(y)
                ],
                givens={
                    self.input:
                    test_set_patches[index * batch_size:(index + 1) *
                                     batch_size, pindex],
                    y:
                    test_set_pre[index * batch_size:(index + 1) * batch_size,
                                 pindex]
                })
        else:
            test_model = theano.function(
                inputs=[index, pindex],
                outputs=[self.errors(y), p_y_given_x],
                givens={
                    self.input:
                    test_set_patches[index * batch_size:(index + 1) *
                                     batch_size, pindex],
                    y:
                    test_set_pre[index * batch_size:(index + 1) * batch_size,
                                 pindex]
                })

        test_losses = []
        test_score = 0

        for minibatch_index in xrange(n_test_batches):
            for pidx in xrange(self.no_of_patches):
                if save_patch_examples:
                    test_loss, membership_probs, raw_errors = test_model(
                        minibatch_index, pidx)
                    patches = dataset[minibatch_index *
                                      batch_size:(minibatch_index + 1) *
                                      batch_size, pidx]
                    self.record_classified_examples(patches, raw_errors)
                else:
                    test_loss, membership_probs = test_model(
                        minibatch_index, pidx)

                test_losses.append(test_loss)
                test_score = numpy.mean(test_loss)
                pre_batch_vals = presences[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size, pidx]
                if self.output == 1:
                    pre_minitest_probs[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size] +=\
                    membership_probs
                if self.output == 2:
                    pre_minitest_probs[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size] *=\
                    10 * membership_probs
                else:
                    pre_minitest_probs[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size, pidx * self.n_out:\
                    (pidx + 1) * self.n_out] = membership_probs

                self.logRegressionLayer.update_conf_mat(
                    pre_batch_vals, membership_probs)

                if not self.quiet:
                    print(
                        "Minibatch %i and its test error %f percent on patch %i"
                        % (minibatch_index, test_score * 100, pidx))
            if self.output == 2:
                pre_minitest_probs = numpy.sqrt(pre_minitest_probs)

        self.save_classified_patches()

        print "Confusion matrix:"
        print self.logRegressionLayer.conf_mat

        self.report_object_patch_statistics()

        fin_test_score = numpy.mean(test_losses)

        print("In the end final test score on whole image is %f\n" %
              (fin_test_score * 100))

        self.data_dict['test_scores'].append(test_losses)
        self.data_dict['test_probs'].append(pre_minitest_probs)

        return fin_test_score, pre_minitest_probs
예제 #7
0
파일: patch_mlp.py 프로젝트: caglar/prmlp
    def train(self,
            data=None,
            presences=None,
            **kwargs):
        """
        Pretrain the MLP on the patches of images.
        """

        learning_rate = kwargs["learning_rate"]
        L1_reg = kwargs["L1_reg"]
        L2_reg = kwargs["L2_reg"]
        n_epochs = kwargs["nepochs"]
        cost_type = kwargs["cost_type"]
        save_exp_data = kwargs["save_exp_data"]
        batch_size = kwargs["batch_size"]
        normalize_weights = kwargs["normalize_weights"]

        presences = numpy.asarray(presences.tolist(), dtype="uint8")
        self.learning_rate = learning_rate

        # Assign the state of MLP:
        self.state = "train"

        if data is None or presences is None:
            raise Exception("Dataset or presences for pretraining can't be None.")

        if data.shape[0] != presences.shape[0]:
            raise Exception("Dataset and presences shape mismatch.")

        train_set_patches = shared_dataset(data, name="train_set_x")
        train_set_pre = shared_dataset(presences, name="train_set_pre")
        train_set_pre = T.cast(train_set_pre, "int32")

        # compute number of minibatches for training, validation and testing
        n_train_batches = int(math.ceil(data.shape[0] / batch_size))
        if self.output == 1 or self.output == 2:
            pre_train_probs =\
            numpy.zeros((data.shape[0], self.n_out))
        else:
            pre_train_probs =\
            numpy.zeros((data.shape[0], self.n_out * self.no_of_patches))

        ######################
        # Pretrain the MODEL #
        ######################
        print '... pretraining the model'

        # allocate symbolic variables for the data
        index = T.lscalar('index')    # index to a [mini]batch
        y = T.ivector('y')  # the labels are presented as 1D vector of presences
        pindex = T.lscalar('pindex')

        #construct the MLP class
        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically.
        cost = self.get_cost_function(cost_type, y, L1_reg, L2_reg)
        p_y_given_x = self.class_memberships

        updates = self.sgd_updates(cost, learning_rate)

        # compiling a Theano function `train_model` that returns the cost, butx
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(inputs=[index, pindex], outputs=[cost, p_y_given_x],
                updates=updates,
                givens={
                    self.input: train_set_patches[index * batch_size:(index + 1) * batch_size, pindex],
                    y: train_set_pre[index * batch_size:(index + 1) * batch_size, pindex]
                    }
                )

        epoch = 0
        costs = []
        Ws = []

        while (epoch < n_epochs):
            epoch_costs = []
            if normalize_weights:
                if epoch != 0:
                    self.normalize_weights()
            if not self.quiet:
                print "Training epoch %d has started." % (epoch)

            for minibatch_index in xrange(n_train_batches):
                minibatch_costs = []
                for pidx in xrange(self.no_of_patches):
                    minibatch_avg_cost, membership_probs = train_model(minibatch_index, pidx)
                    minibatch_costs.append(float(minibatch_avg_cost.tolist()))
                    if self.output == 1:
                        pre_train_probs[minibatch_index * batch_size:\
                        (minibatch_index + 1) * batch_size] += membership_probs
                    if self.output == 2:
                        pre_train_probs[minibatch_index * batch_size:\
                        (minibatch_index + 1) * batch_size] *= 10 * membership_probs
                    else:
                        pre_train_probs[minibatch_index * batch_size: (minibatch_index + 1) * batch_size, pidx * self.n_out: (pidx + 1) * self.n_out] = membership_probs
                if self.output == 2:
                    pre_train_probs = numpy.sqrt(pre_train_probs)

                Ws.append(self.params[2])
                epoch_costs.append(minibatch_costs)

            costs.append(epoch_costs)
            if not self.quiet:
                print "Normalizing the weights"
            epoch += 1

        self.data_dict['costs'].append([costs])
        self.data_dict['train_probs'].append(pre_train_probs)
        return costs, pre_train_probs
예제 #8
0
파일: optimization.py 프로젝트: cfarbs/R3N
def mini_batch_sgd_with_annealing(motif, train_data, labels, xTrain_data, xTrain_targets,
                                  learning_rate, L1_reg, L2_reg, epochs,
                                  batch_size,
                                  hidden_dim, model_type, model_file=None,
                                  trained_model_dir=None, verbose=True, extra_args=None):
    # Preamble #
    # determine dimensionality of data and number of classes
    n_train_samples, data_dim = train_data.shape
    n_classes = len(set(labels))

    # compute number of mini-batches for training, validation and testing
    train_set_x, train_set_y = shared_dataset(train_data, labels, True)
    xtrain_set_x, xtrain_set_y = shared_dataset(xTrain_data, xTrain_targets, True)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_xtrain_batches = xtrain_set_x.get_value(borrow=True).shape[0] / batch_size

    batch_index = T.lscalar()

    # containers to hold mini-batches
    x = T.matrix('x')
    y = T.ivector('y')

    net = get_network(x=x, in_dim=data_dim, n_classes=n_classes, hidden_dim=hidden_dim, model_type=model_type,
                      extra_args=extra_args)

    if net is False:
        return False

    # cost function
    cost = (net.negative_log_likelihood(labels=y) + L1_reg * net.L1 + (L2_reg / n_train_samples) * net.L2_sq)

    xtrain_fcn = theano.function(inputs=[batch_index],
                                 outputs=net.errors(y),
                                 givens={
                                     x: xtrain_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                                     y: xtrain_set_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                                 })

    # gradients
    nambla_params = [T.grad(cost, param) for param in net.params]

    # update tuple
    dynamic_learning_rate = T.as_tensor_variable(learning_rate)

    # dynamic_learning_rate = learning_rate
    updates = [(param, param - dynamic_learning_rate * nambla_param)
               for param, nambla_param in zip(net.params, nambla_params)]

    # main function? could make this an attribute and reduce redundant code
    train_fcn = theano.function(inputs=[batch_index],
                                outputs=cost,
                                updates=updates,
                                givens={
                                    x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                                    y: train_set_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                                })
    train_error_fcn = theano.function(inputs=[batch_index],
                                      outputs=net.errors(y),
                                      givens={
                                          x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                                          y: train_set_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                                      })

    if model_file is not None:
        net.load_from_file(file_path=model_file, careful=True)

    # do the actual training
    batch_costs = [np.inf]
    add_to_batch_costs = batch_costs.append
    xtrain_accuracies = []
    add_to_xtrain_acc = xtrain_accuracies.append
    train_accuracies = []
    add_to_train_acc = train_accuracies.append
    xtrain_costs_bin = []
    prev_xtrain_cost = 1e-10

    best_xtrain_accuracy = -np.inf
    best_model = ''
    check_frequency = int(epochs / 10)

    for epoch in xrange(0, epochs):
        # evaluation of training progress and summary stat collection
        if epoch % check_frequency == 0:
            # get the accuracy on the cross-train data
            xtrain_errors = [xtrain_fcn(_) for _ in xrange(n_xtrain_batches)]
            avg_xtrain_errors = np.mean(xtrain_errors)
            avg_xtrain_accuracy = 100 * (1 - avg_xtrain_errors)
            # then the training set
            train_errors = [train_error_fcn(_) for _ in xrange(n_train_batches)]
            avg_training_errors = np.mean(train_errors)
            avg_train_accuracy = 100 * (1 - avg_training_errors)
            # collect for tracking progress
            add_to_xtrain_acc(avg_xtrain_accuracy)
            add_to_train_acc(avg_train_accuracy)
            xtrain_costs_bin += xtrain_errors

            if verbose:
                print("{0}: epoch {1}, batch cost {2}, train accuracy {3}, cross-train accuracy {4}"
                      .format(motif, epoch, batch_costs[-1], avg_train_accuracy, avg_xtrain_accuracy), file=sys.stderr)

            # if we're getting better, save the model, the 'oldest' model should be the one with the highest
            # cross-train accuracy
            if avg_xtrain_accuracy >= best_xtrain_accuracy and trained_model_dir is not None:
                if not os.path.exists(trained_model_dir):
                    os.makedirs(trained_model_dir)
                # update the best accuracy and best model
                best_xtrain_accuracy = avg_xtrain_accuracy
                best_model = "{0}model{1}.pkl".format(trained_model_dir, epoch)
                net.write(best_model)

        for i in xrange(n_train_batches):
            batch_avg_cost = train_fcn(i)
            if i % (n_train_batches / 10) == 0:
                add_to_batch_costs(float(batch_avg_cost))

        # annealing protocol
        mean_xtrain_cost = np.mean([xtrain_fcn(_) for _ in xrange(n_xtrain_batches)])
        if mean_xtrain_cost / prev_xtrain_cost < 1.0:
            dynamic_learning_rate *= 0.9

        if mean_xtrain_cost > prev_xtrain_cost:
            dynamic_learning_rate *= 1.05
        prev_xtrain_cost = mean_xtrain_cost

    # pickle the summary stats for the training
    summary = {
        "batch_costs": batch_costs,
        "xtrain_accuracies": xtrain_accuracies,
        "train_accuracies": train_accuracies,
        "xtrain_errors": xtrain_costs_bin,
        "best_model": best_model
    }
    if trained_model_dir is not None:
        with open("{}summary_stats.pkl".format(trained_model_dir), 'w') as f:
            cPickle.dump(summary, f)

    return net, summary
예제 #9
0
def test_mlp_parity(learning_rate=0.01,
                    L1_reg=0.00,
                    L2_reg=0.0001,
                    n_epochs=100,
                    batch_size=64,
                    n_hidden=500,
                    n_hiddenLayers=1,
                    verbose=False):

    reader = csv.reader(open("joint_knee.csv", "rb"), delimiter=',')

    x = list(reader)
    #print x
    result = numpy.array(x)

    #print result.shape

    def score_to_numeric(x, a):
        if (x == 'Hospice - Home'):
            return 11
        if (x == 'Psychiatric Hospital or Unit of Hosp'):
            return 10
        if (x == 'Hospice - Medical Facility'):
            return 9
        if (x == 'Expired'):
            return 8
        if (x == 'Facility w/ Custodial/Supportive Care'):
            return 7
        if (x.lower() == 'left against medical advice'):
            return 6
        if (x.lower() == 'short-term hospital'):
            return 5
        if (x.lower() == 'multi-racial' or x.lower() == 'home or self care'):
            return 4
        if (x.lower() == 'other race' or x.lower() == 'emergency'
                or x.lower() == 'skilled nursing home'
                or x.lower() == 'not available'):
            return 3
        if (x.lower() == 'm' or x.lower() == 'black/african american'
                or x.lower() == 'urgent'
                or x.lower() == 'inpatient rehabilitation facility'):
            return 2
        if (x.lower() == 'f' or x.lower() == 'white' or x.lower() == 'elective'
                or x.lower() == 'home w/ home health services'):
            return 1
        if (a == 1):
            return int(x[:2])
        if (a == 2):
            return float(x[1:])
        else:
            return float(x)

    rownum = 0
    for row in result:
        # Save header row.
        if rownum == 0:
            rownum += 1
            header = row
            for i in range(0, len(header)):
                if header[i].lower() == 'gender':
                    gender = i
                if header[i].lower() == 'race':
                    race = i
                if header[i].lower() == 'type of admission':
                    admi = i
                if header[i].lower() == 'patient disposition':
                    disp = i
                if header[i].lower() == 'age group':
                    age = i
                if header[i].lower() == 'total charges':
                    price = i
        else:
            row[gender] = score_to_numeric(row[gender], 0)
            row[race] = score_to_numeric(row[race], 0)
            row[admi] = score_to_numeric(row[admi], 0)
            row[disp] = score_to_numeric(row[disp], 0)
            row[age] = score_to_numeric(row[age], 1)
            row[price] = score_to_numeric(row[price], 2)
            for i in range(0, len(row)):
                row[i] = float(row[i])
                #y = row[i].astype(numpy.float)
                #row[i] = y
                #print type(row[i])

    #print type(result)
    #result = numpy.array(result).astype('float')
    #print result[1:(len(result)),1:]
    res = result[1:(len(result)), 1:].astype(numpy.float)
    for i in range(len(res)):
        for j in range(len(res[0])):
            if (j == 9):
                res[i, j] = int(round(res[i, j] / 10000))
            else:
                res[i, j] = int(round(res[i, j]))

    myset = set(res[:, 9])
    nout = len(myset)

    y = res[:, 9]
    #print y
    x = res[:, 0:9]

    iris = load_iris()
    clf = ExtraTreesClassifier()
    clf = clf.fit(x, y)
    model = SelectFromModel(clf, prefit=True)
    X_new = model.transform(x)
    data = np.c_[X_new, y]

    totallen = len(data)
    numpy.random.shuffle(data)
    training, validation, testing = data[:totallen / 2, :], data[totallen / 2:(
        3 * totallen / 4), :], data[(3 * totallen / 4):, :]

    l = len(data[0]) - 1

    train_set = [training[:, 0:l], training[:, l]]
    valid_set = [validation[:, 0:l], validation[:, l]]
    test_set = [testing[:, 0:l], testing[:, l]]

    #print train_set
    #print valid_set
    #print test_set

    # Convert raw dataset to Theano shared variables.
    train_set_x, train_set_y = shared_dataset(train_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    test_set_x, test_set_y = shared_dataset(test_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=l,
                       n_hidden=n_hidden,
                       n_out=len(myset),
                       n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model, n_train_batches,
             n_valid_batches, n_test_batches, n_epochs, verbose)

    y_p_train = theano.function(inputs=[],
                                outputs=[classifier.logRegressionLayer.y_pred],
                                givens={x: train_set_x})

    y_predict = theano.function(inputs=[],
                                outputs=[classifier.logRegressionLayer.y_pred],
                                givens={x: test_set_x})
    y_pred1 = y_p_train()
    y_pred2 = y_predict()

    return y_pred1, y_pred2
예제 #10
0
batch_size = 5000
learning_rate = 3e-7
weight_decay = 2

# prepare the data
print ".preparing data"
dataset_paths = [
        utils.complement_path('/share/blur_images/all_in_one/train_o_set.npy'),
        utils.complement_path('/share/blur_images/all_in_one/train_b_set.npy'),
        utils.complement_path('/share/blur_images/all_in_one/valid_o_set.npy'),
        utils.complement_path('/share/blur_images/all_in_one/valid_b_set.npy'),
        utils.complement_path('/share/blur_images/all_in_one/test_o_set.npy'),
        utils.complement_path('/share/blur_images/all_in_one/test_b_set.npy')]

datasets = [
        utils.shared_dataset(np.load(dataset_path)) for dataset_path in dataset_paths]

# build the network
print ".building network"

normal  = T.fmatrix('normal')
corrupt = T.fmatrix('corrupt')

index = T.lscalar('index')

corrupt_input = corrupt.reshape((batch_size, 1, patch_shape[0], patch_shape[1]))
normal_input = normal.reshape((batch_size, 1, patch_shape[0], patch_shape[1]))

# patch extraction and representation, output shape=(33-9+1, 33-9+1)=(25, 25)
layer0_conv = ConvLayer(
        input = corrupt_input,
예제 #11
0
    np.save('/home/ubuntu/temp_data/y_valid_' + str(j), y_valid)
    np.save('/home/ubuntu/temp_data/x_test_' + str(j),
            normalized_data[test_index])
    np.save('/home/ubuntu/temp_data/y_test_' + str(j), labels[test_index])

    j = j + 1

del x_train, x_train_sm, x_valid, y_train, y_train_sm, y_valid, train_valid_data, train_valid_labels

del normalized_data

for j in range(k):
    print('--- iteration no. %d ---' % (j + 1))

    x_train_sm, y_train_sm = shared_dataset(
        np.load('/home/ubuntu/temp_data/x_train_sm_' + str(j) + '.npy'),
        np.load('/home/ubuntu/temp_data/y_train_sm_' + str(j) + '.npy'))

    ######################
    # BUILDING THE MODEL #
    ######################
    print('building SDA...')
    numpy_rng = np.random.RandomState(np.random.randint(0, 10000))
    sda = SdA(numpy_rng=numpy_rng,
              n_ins=visible_units,
              hidden_layers_sizes=hidden_layers_sizes,
              n_outs=2)

    n_train_batches = x_train_sm.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
예제 #12
0
파일: postmlp.py 프로젝트: caglar/prmlp
    def train(self,
             data=None,
             labels=None,
             **kwargs):

        learning_rate = kwargs["learning_rate"]
        L1_reg = kwargs["L1_reg"]
        L2_reg = kwargs["L2_reg"]
        n_epochs = kwargs["nepochs"]
        cost_type = kwargs["cost_type"]
        save_exp_data = kwargs["save_exp_data"]
        batch_size = kwargs["batch_size"]
        normalize_weights = kwargs["normalize_weights"]
        enable_dropout = kwargs["enable_dropout"]

        if data is None:
            raise Exception("Post-training can't start without pretraining class membership probabilities.")

        if labels is None:
            raise Exception("Post-training can not start without posttraining class labels.")

        self.state = "train"

        self.learning_rate = learning_rate

        train_set_x = shared_dataset(data, name="training_set_x")
        train_set_y = shared_dataset(labels, name="labels")
        train_set_y = T.cast(train_set_y, "int32")

        # compute number of minibatches for training
        n_examples = data.shape[0]
        n_train_batches = int(math.ceil(n_examples / batch_size))

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '...postraining the model'
        # allocate symbolic variables for the data
        index = T.lscalar('index')    # index to a [mini]batch
        y = T.ivector('y')  # the labels are presented as 1D vector of int32

        mode = "FAST_RUN"
        #import pudb; pudb.set_trace()
        if DEBUGGING:
            index.tag.test_value = 0
            y.tag.test_value = numpy.ones(n_examples)
            mode = "DEBUG_MODE"

        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically.
        cost = self.get_cost_function(cost_type, y, L1_reg, L2_reg)
        updates = self.sgd_updates(cost, learning_rate)

        # compiling a Theano function `train_model` that returns the cost, butx
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        # p_y_given_x = self.class_memberships
        train_model = theano.function(inputs=[index],
            outputs=cost,
            updates = updates,
            givens = {
                self.input: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]
            },
            mode=mode)

        if DEBUGGING:
            theano.printing.debugprint(train_model)

        epoch = 0
        costs = []
        Ws = []

        while (epoch < n_epochs):
            print "In da epoch %d" % (epoch)
            for minibatch_index in xrange(n_train_batches):
                print "Postraining in Minibatch %i " % (minibatch_index)
                minibatch_avg_cost = train_model(minibatch_index)
                if enable_dropout:
                    self.dropout()

                if normalize_weights:
                    self.normalize_weights()

                costs.append(float(minibatch_avg_cost))
                Ws.append(self.params[2])
            epoch +=1

        if save_exp_data:
            self.data_dict['Ws'].append(Ws)
            self.data_dict['costs'].append([costs])
            self.save_data()
        return costs
예제 #13
0
파일: postmlp.py 프로젝트: caglar/prmlp
    def test(self,
             data=None,
             labels=None,
             **kwargs):

        save_exp_data = kwargs["save_exp_data"]
        batch_size = kwargs["batch_size"]

        if data is None:
            raise Exception("Post-training can't start without pretraining class membership probabilities.")

        if labels is None:
            raise Exception("Post-training can not start without posttraining class-membership probabilities.")

        test_set_x = shared_dataset(data)
        test_set_y = shared_dataset(labels)
        test_set_y = T.cast(test_set_y, "int32")

        self.state = "test"

        # compute number of minibatches for training, validation and testing
        n_examples = data.shape[0]
        n_test_batches = int(math.ceil(n_examples / batch_size))

        print '...post-testing the model'

        # allocate symbolic variables for the data
        index = T.lscalar()    # index to a [mini]batch

        y = T.ivector('y')  # the labels are presented as 1D vector of
                            # [int] labels

        mode = "FAST_RUN"
        if DEBUGGING:
            theano.config.compute_test_value = 'raise'
            index.tag.test_value = 0
            y.tag.test_value = numpy.ones(n_examples)
            mode = "DEBUG_MODE"

        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically

        # compiling a Theano function `test_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`

        test_model = theano.function(inputs=[index],
            outputs=self.errors(y),
            givens={
                self.input: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]},
            mode=mode)

        ###############
        # TEST MODEL  #
        ###############

        test_losses = []

        for minibatch_index in xrange(n_test_batches):
            test_losses.append(float(test_model(minibatch_index)))
            test_score = numpy.mean(test_losses)
            print("Minibatch %i, mean test error %f" % (minibatch_index, test_score * 100))

        if save_exp_data:
            self.data_dict['test_scores'].append(test_losses)
            self.save_data()

        return test_score, test_losses
예제 #14
0
    # build the mask matrix for missing values, load it into theano shared variable
    # build masks where 0 values correspond to nan values
    temp = np.isnan(train_set_y)
    train_MASK = np.ones(temp.shape)
    train_MASK[temp] = 0
    # still have to replace nan with something to avoid propagation in theano
    train_set_y[temp] = -1000
    temp = np.isnan(valid_set_y)
    val_MASK = np.ones(temp.shape)
    val_MASK[temp] = 0
    # still have to replace nan with something to avoid propagation in theano
    valid_set_y[temp] = -1000

    # load into theano shared variable
    print 'load data to gpu \n'
    train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y)
    valid_set_x, valid_set_y = shared_dataset(valid_set_x, valid_set_y)
    val_MASK, train_MASK = shared_dataset(val_MASK, train_MASK)

    X = T.ftensor4('X')
    y = T.matrix('y')

    batch_size = 32
    l2 = .0002
    learn_rate = 1e-3

    #####################################################
    # # Continue a previous run
    # with open("results_backup.p", "rb") as f:
    #     best_network_params, best_val_loss_, best_epoch_,train_loss_history_, val_loss_history_, network = pickle.load(f)
    # # extract input var
예제 #15
0
def test_mlp_parity(n_bit):
    #f=open('./problem_b/shallow_mlp_8bit.txt','w')
    #f=open('./problem_b/shallow_mlp_12bit.txt','w')
    f = open('./problem_b/deep_mlp_8bit.txt', 'w')
    #f=open('./problem_b/deep_mlp_12bit.txt','w')
    batch_size = 24
    #n_hidden=24
    n_hidden = (24, 24, 24, 24)
    learning_rate = 0.08
    L1_reg = 0.0
    L2_reg = 0.0
    n_epochs = 300
    n_hiddenLayers = 4
    # generate datasets
    train_set = gen_parity_pair(n_bit, 2000)
    valid_set = gen_parity_pair(n_bit, 500)
    test_set = gen_parity_pair(n_bit, 100)

    # Convert raw dataset to Theano shared variables.
    train_set_x, train_set_y = shared_dataset(train_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    test_set_x, test_set_y = shared_dataset(test_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    #training_enabled = T.iscalar('training_enabled')
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')
    #print('... building the model', file=f)

    rng = np.random.RandomState(23455)

    layers_input = x.reshape((batch_size, n_bit))
    layers = myMLP(rng,
                   input=layers_input,
                   n_in=n_bit,
                   n_hidden=n_hidden,
                   n_out=2,
                   n_hiddenLayers=n_hiddenLayers)

    test_model = theano.function(
        [index],
        layers.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #training_enabled: numpy.cast['int32'](0)
        })

    validate_model = theano.function(
        [index],
        layers.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            #training_enabled: numpy.cast['int32'](0)
        })

    cost = layers.negative_log_likelihood(
        y) + layers.L1 * L1_reg + layers.L2_sqr * L2_reg
    params = layers.params
    grads = [T.grad(cost, param) for param in params]

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            #training_enabled: numpy.cast['int32'](1)
        })

    ###############
    # TRAIN MODEL #
    ###############

    print('... training')
    #print('... training',file=f)
    train_nn(train_model=train_model,
             validate_model=validate_model,
             test_model=test_model,
             n_train_batches=n_train_batches,
             n_valid_batches=n_valid_batches,
             n_test_batches=n_test_batches,
             n_epochs=n_epochs,
             fil=f)
    f.close()
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,timegap = 0.5,):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    train_set, valid_set, test_set = load_data(theano_shared=False)

    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    print test_set_y.eval().shape
    
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size


    print 'n_train_batches : ',n_train_batches
    print 'n_valid_batches : ',n_valid_batches
    print 'n_test_batches : ',n_test_batches

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    # input, n_in, n_hidden, n_out, n_hiddenLayers
    classifier = myMLP(rng, input = x, n_in =2304 , n_hidden = n_hidden, n_out= 7, n_hiddenLayers= n_hiddenLayers,parameters =None)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)


    print ('MODEL TRAINED..')
    
    ##########
    # SAVE the MODEL
    ##########

    import os
    modelFolderName = 'mlp_models'
    cmd = 'mkdir %s'%modelFolderName
    os.system(cmd)
    save_model(classifier,n_hidden,n_hiddenLayers,modelFolderName + '/'+'mlp_classifier_nhidden_%s_hiddenlayers_%s_batchSize_%s_epochs_%s'%(n_hidden,n_hiddenLayers,batch_size,n_epochs))

    print 'Model Saved. '

# modelFolderName = 'mlp_models'
# modelName = 'mlp_classifier_nhidden_500_hiddenlayers_3_batchSize_20_epochs_2_json.save'
# test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=2,batch_size=20, n_hidden=500, n_hiddenLayers=3,verbose=True, smaller_set=False)
# predict_from_trained_model(modelFolderName +'/'+modelName)
예제 #17
0
        # if not os.path.exists(save_path):
        #     os.makedirs(save_path); print 'create dir',save_path
        # save_the_env(dir_to_save='../cifar10', path=save_path)
        
    if nndF:
        X = T.matrix('X')
        logistic_reg = unpickle(nnd_path+'/best_model.pkl')
        get_lr_pred = theano.function([X], logistic_reg.forward(X))

    import cPickle, gzip
    f = gzip.open(datapath, 'rb')
    train_set_np, valid_set_np, test_set_np = cPickle.load(f)
    f.close()

    N ,D = train_set_np[0].shape; Nv,D = valid_set_np[0].shape; Nt,D = test_set_np[0].shape
    train_set = shared_dataset(train_set_np)
    valid_set = shared_dataset(valid_set_np)
    test_set  = shared_dataset(test_set_np )

    print 'batch sz %d, epsilon gen %g, epsilon dis %g, hnum_z %d, num_conv_hid %g, num_epoch %di, lam %g' % \
                                    (batch_sz, epsilon_gen, epsilon_dis, num_z, conv_num_hid, num_epoch, lam)

    book_keeping = []

    num_hids     = [num_hid1]
    train_params = [num_epoch, epoch_start, contF]
    opt_params   = [batch_sz, epsilon_gen, epsilon_dis, momentum, num_epoch, N, Nv, Nt, lam]    
    ganI_params  = [batch_sz, D, num_hids, rng, num_z, nkerns, ckern, num_channel]
    conv_params  = [conv_num_hid, D, num_class, batch_sz, num_channel]
    min_vl_cost = main(train_set, valid_set, test_set, opt_params, ganI_params, train_params, conv_params)
    book_keeping.append(min_vl_cost)
예제 #18
0
파일: patch_mlp.py 프로젝트: caglar/prmlp
    def test(self,
            dataset=None,
            presences=None,
            **kwargs):
        """
        Test the mlp on the given dataset with the presences.
        """
        save_costs_to_file = kwargs["save_exp_data"]
        batch_size = kwargs["batch_size"]
        save_patch_examples = False

        if kwargs.has_key("save_classified_patches"):
            save_patch_examples = kwargs["save_classified_patches"]

        if dataset is None or presences is None:
            raise Exception("Dataset or presences for pretraining can't be None.")

        self.state = "test"
        test_set_patches = shared_dataset(dataset, name="test_set_x")
        presences = numpy.asarray(presences.tolist(), dtype="int32")
        test_set_pre = shared_dataset(presences, name="test_set_pre")
        test_set_pre = T.cast(test_set_pre, 'int32')

        # compute number of minibatches for training, validation and testing
        n_test_batches = int(math.ceil(dataset.shape[0] / batch_size))

        if self.output == 1 or self.output == 2:
            pre_minitest_probs = numpy.zeros((dataset.shape[0], self.n_out))
        else:
            pre_minitest_probs = numpy.zeros((dataset.shape[0], self.n_out * self.no_of_patches))

        ######################
        # Testing the MODEL. #
        ######################
        print '... pre-testing the model'

        # allocate symbolic variables for the data
        index = T.lscalar()    # index to a [mini]batch
        y = T.ivector('y')  # the labels are presented as 1D vector of presences
        pindex = T.lscalar('pindex')

        p_y_given_x = self.class_memberships

        if save_patch_examples:
            test_model = theano.function(
                inputs=[index, pindex],
                outputs=[self.errors(y), p_y_given_x, self.raw_prediction_errors(y)],
                givens={
                    self.input: test_set_patches[index * batch_size: (index + 1) * batch_size, pindex],
                    y: test_set_pre[index * batch_size: (index + 1) * batch_size, pindex]
                }
            )
        else:
            test_model = theano.function(
                inputs=[index, pindex],
                outputs=[self.errors(y), p_y_given_x],
                givens={
                    self.input: test_set_patches[index * batch_size: (index + 1) * batch_size, pindex],
                    y: test_set_pre[index * batch_size: (index + 1) * batch_size, pindex]
                }
            )

        test_losses = []
        test_score = 0

        for minibatch_index in xrange(n_test_batches):
            for pidx in xrange(self.no_of_patches):
                if save_patch_examples:
                    test_loss, membership_probs, raw_errors = test_model(minibatch_index, pidx)
                    patches = dataset[minibatch_index * batch_size: (minibatch_index + 1) * batch_size, pidx]
                    self.record_classified_examples(patches, raw_errors)
                else:
                    test_loss, membership_probs = test_model(minibatch_index, pidx)

                test_losses.append(test_loss)
                test_score = numpy.mean(test_loss)
                pre_batch_vals = presences[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size, pidx]
                if self.output == 1:
                    pre_minitest_probs[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size] +=\
                    membership_probs
                if self.output == 2:
                    pre_minitest_probs[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size] *=\
                    10 * membership_probs
                else:
                    pre_minitest_probs[minibatch_index * batch_size:\
                    (minibatch_index + 1) * batch_size, pidx * self.n_out:\
                    (pidx + 1) * self.n_out] = membership_probs

                self.logRegressionLayer.update_conf_mat(pre_batch_vals, membership_probs)

                if not self.quiet:
                    print("Minibatch %i and its test error %f percent on patch %i" % (minibatch_index, test_score * 100, pidx))
            if self.output == 2:
                pre_minitest_probs = numpy.sqrt(pre_minitest_probs)

        self.save_classified_patches()

        print "Confusion matrix:"
        print self.logRegressionLayer.conf_mat

        self.report_object_patch_statistics()

        fin_test_score = numpy.mean(test_losses)

        print("In the end final test score on whole image is %f\n" % (fin_test_score * 100))

        self.data_dict['test_scores'].append(test_losses)
        self.data_dict['test_probs'].append(pre_minitest_probs)

        return fin_test_score, pre_minitest_probs
예제 #19
0
def run(rng_seed,ltype, mtype,load_path, load_epoch, sample=False, nclass=10, whichclass=None, verbose=False, class_list=None, ckernr=None, cri_ckern=None):
    
    assert ckernr!=None
    #  ltype -> GAN LSGAN WGAN 
    #    JS      0.4+-asdf
    #    LS
    #    WA
    #    MMD 
    #    IS


    ### MODEL PARAMS
    ### MODEL PARAMS
    # ltype       = sys.argv[3]
    # mtype       = 'js'
    # print 'ltype: ' + ltype
    # print 'mtype: ' + mtype
    mmdF        = False
    nndF        = False

    # CONV (DISC)
    conv_num_hid= 100
    num_channel = 3 #Fixed
    num_class   = 1 #Fixed
    D=64*64*3
    kern=int(ckernr.split('_')[0])

    ### OPT PARAMS
    batch_sz    = 100
    momentum    = 0.0 #Not Used
    lam         = 0.0
    
    epsilon_dis = 0.0002
    epsilon_gen = 0.0001
    
    # if mtype =='js' :
    #     epsilon_dis = 0.0002
    #     epsilon_gen = 0.0001
    #     K=5 #FIXED
    #     J=1
    # elif mtype == 'ls':
    #     epsilon_dis = 0.0002
    #     epsilon_gen = 0.0001
    #     K=5 #FIXED
    #     J=1
    # else:
    #     epsilon_dis = 0.0002
    #     epsilon_gen = 0.0001
    #     K=2 #FIXED
    #     J=1

    # ganI (GEN)
    filter_sz   = 4 #FIXED
    nkerns      = [1,8,4,2,1]
    ckern       = int(ckernr.split('_')[-1]) #20
    num_hid1    = nkerns[0]*ckern*filter_sz*filter_sz #Fixed
    num_z       = 100

    ### TRAIN PARAMS
    num_epoch   = 10
    epoch_start = 0 #Fixed
    contF       = True #Fixed
    
    num_hids     = [num_hid1]
    
    input_width = 64
    input_height = 64
    input_depth = 3
    
    ### SAVE PARAM
    model_param_save = 'num_hid%d.batch%d.eps_dis%g.eps_gen%g.num_z%d.num_epoch%g.lam%g.ts%d.data.100_CONV_lsun'%(conv_num_hid,batch_sz, epsilon_dis, epsilon_gen, num_z, num_epoch, lam1, num_steps)

    
    # device=sys.argv[1]
    import os
    os.environ['RNG_SEED'] = str(rng_seed)
    os.environ['LOAD_PATH'] = load_path
    os.environ['LOAD_EPOCH'] = str(load_epoch)
    os.environ['LTYPE'] = ltype
    # os.environ['MTYPE'] = mtype
    try:
        a=os.environ['CRI_KERN']
    except:
        if cri_ckern!=None: 
            os.environ['CRI_KERN']=cri_ckern
        else:
            raise RuntimeError('cri_kern not provided')
    
    import theano 
    import theano.sandbox.rng_mrg as RNG_MRG
    rng = np.random.RandomState(int(os.environ['RNG_SEED']))
    MRG = RNG_MRG.MRG_RandomStreams(rng.randint(2 ** 30))
    
    from util_cifar10 import load_cifar10
    from utils import shared_dataset, unpickle
    
    
    import pwd; username = pwd.getpwuid(os.geteuid()).pw_name
    
    global nnd_path
    if username in ['hma02', 'mahe6562']:
        if username=='hma02':
            datapath = '/mnt/data/hma02/data/cifar10/cifar-10-batches-py/'
            save_path = '/mnt/data/hma02/gap/dcgan-cifar10/'
            nnd_path = '/mnt/data/hma02/gap/'
        else:
            datapath = '/scratch/g/gwtaylor/mahe6562/data/cifar10/cifar-10-batches-py/'
            save_path = '/scratch/g/gwtaylor/mahe6562/gap/dcgan-cifar10/'
            nnd_path = '//scratch/g/gwtaylor/mahe6562/gap/'
            
        import time; date = '%d-%d' % (time.gmtime()[1], time.gmtime()[2])
        import os; worker_id = os.getpid()
        save_path+= date+'-%d-%s/' % (worker_id,ltype)
        # if not os.path.exists(save_path):
        #     os.makedirs(save_path); print 'create dir',save_path
        #
        # save_the_env(dir_to_save='../mnist', path=save_path)
        
    global train_set_np,valid_set_np,test_set_np
    
    train_set_np, valid_set_np, test_set_np = load_cifar10(path=datapath, verbose=False)
    # 127.5 - 1. in order to rescale to -1 to 1.
    
    
    train_set_np[0] = train_set_np[0] / 255.0 #127.5 - 1.
    valid_set_np[0] = valid_set_np[0] / 255.0 #127.5 - 1.
    test_set_np[0]  = test_set_np[0]  / 255.0 #127.5 - 1.
    
    N ,D = train_set_np[0].shape; Nv,D = valid_set_np[0].shape; Nt,D = test_set_np[0].shape
    
    train_set = shared_dataset(train_set_np)
    valid_set = shared_dataset(valid_set_np)
    test_set  = shared_dataset(test_set_np )

    # print 'batch sz %d, epsilon gen %g, epsilon dis %g, hnum_z %d, num_conv_hid %g, num_epoch %di, lam %g' % \
#                                     (batch_sz, epsilon_gen, epsilon_dis, num_z, conv_num_hid, num_epoch, lam)

    book_keeping = []

    num_hids     = [num_hid1]
    train_params = [num_epoch, epoch_start, contF]
    opt_params   = [batch_sz, epsilon_gen, epsilon_dis, momentum, num_epoch, N, Nv, Nt, lam]    
    ganI_params  = [batch_sz, D, num_hids, rng, num_z, nkerns, ckern, num_channel]
    conv_params  = [conv_num_hid, D, num_class, batch_sz, num_channel, kern]
    
    if sample==True:
        samples = main(train_set, valid_set, test_set, opt_params, ganI_params, train_params, conv_params, sample)
        return 0,0,0,0
    else:
        te_score_ls, te_score_iw , mmd_te , is_sam = main(train_set, valid_set, test_set, opt_params, ganI_params, train_params, conv_params, sample)
    
        return te_score_ls, te_score_iw , mmd_te , is_sam
예제 #20
0
def train(learning_rate=0.1, n_epochs=10, kernel_shapes = [7,5],
                    nkerns=[15,15], batch_size=1000, batch_type = 'fast',
                    mynet = 'best', representation='raw', momentum=0, history=4):

    # TODO: implement history of boards
    rng = numpy.random.RandomState(42)

    trainP = 0.998
    validP = 0.001
    testP  = 0.001    
    
    print "... Reading cached values ..."
    (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("results/lengths.cache",'r'))
    
    print "... Getting filenames ..."
    datasetKGS = "../../go-data"
    datasetPro = "../../pro-GoGod"
    # use both datasets, test and valid set are only Pro games
#    fn1 = readGame.getFilenames(datasetKGS,1,0,1)[0]
#    random.shuffle(fn1)    
#    fn2 = readGame.getFilenames(datasetPro,1,0,1)[0]
    # NOTE: last 5% of professional games never used!
#    fn2 = fn2[:int(len(fn2)*0.95)]
#    random.shuffle(fn2)
#    filenames = fn2 #fn1 + fn2
    n = len(filenames)
    print "... Learning set contains " + str(n) + " games"
    
    print "... Computing cumulative game lengths ..."
    trainNames = filenames[:int(trainP*n)]
    validNames = filenames[int(trainP*n):int(trainP*n+validP*n)]
    testNames  = filenames[int(trainP*n+validP*n):int(trainP*n+validP*n+testP*n)]
    
#    random.shuffle(trainNames)
    
#    trainCumLengths = readGame.getCumGameLengths(trainNames)
#    validCumLengths = readGame.getCumGameLengths(validNames)
#    testCumLengths = readGame.getCumGameLengths(testNames)
    
#    fw = open("results/"lengths.cache","wb")
#    pickle.dump((trainCumLengths,validCumLengths,testCumLengths,filenames),fw)
#    fw.close()
    print "... Preprocessing initial batches ..."
    minn = batch_size / 80 +1
    temp = time.time()
    test_batch_x, test_batch_y = utils.shared_dataset(readGame.processSGFs(testNames[:minn],representation),batch_size=batch_size)
    train_batch_x, train_batch_y = utils.shared_dataset(readGame.processSGFs(trainNames[:minn],representation),batch_size=batch_size)
    valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processSGFs(validNames[:minn],representation),batch_size=batch_size)
    print "    average processing time per game: " + str((time.time()-temp)/18.0) + " seconds, per epoch: " + str(int((time.time()-temp)/18*n/60/60)) + " hours" 

    # compute number of minibatches for training, validation and testing
    n_train_batches = trainCumLengths[-1]
    n_valid_batches = validCumLengths[-1]
    n_test_batches =  testCumLengths[-1]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    iteration = T.lscalar()  # iteration number of a minibatch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    gs = 19 # size of the go board
    ishape = (gs, gs)  # this is the size of MNIST images

    fw = open("results/"+mynet+"_"+str(learning_rate)+"_"+str(nkerns[0])+".res","w")
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... Building the model ...'
   
    nc = 2 if representation=='raw' else 6  # if raw
    nc *= 1+history
    
    if mynet == "default":
        # default is 7x7, regular 3 kernels
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, nc, gs, gs),
                filter_shape=(nkerns[0], nc, 7, 7), poolsize=(1, 1))
        layer2_input = layer0.output.flatten(2)
        layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[0] * 13 * 13,
                           n_out=500, activation=T.tanh)
        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=361)
        cost = layer3.negative_log_likelihood(y)
 
#        prevGrads = [theano.shared(numpy.zeros((500,361),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((361,),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((nkerns[0] *13*13,500), dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((500,),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((nkerns[0],nc,7,7),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True),
#                ]
        params = layer3.params + layer2.params + layer0.params
    
   
    if mynet == "best":
        ks = kernel_shapes
        sp1= gs-ks[0]+1
        sp2= sp1-ks[1]+1
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, nc, gs, gs),
                filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1))
        layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                image_shape=(batch_size, nkerns[0], sp1, sp1),
                filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1))
        layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs)
        cost = layer3.negative_log_likelihood(y)
    
        prevGrads = [theano.shared(numpy.zeros((nkerns[1]*9*9,361),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((gs*gs,),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[0],nkerns[1],ks[1],ks[1]), dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[1],),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[0],nc,ks[0],ks[0]),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True),
                ]
        params = layer3.params + layer1.params + layer0.params

    if mynet == "padded":       # TODO: add zero padding test deeper architectures
        ks = kernel_shapes
        sp1= gs-ks[0]+1
        sp2= sp1-ks[1]+1
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, nc, gs, gs),
                filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1))
        layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                image_shape=(batch_size, nkerns[0], sp1, sp1),
                filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1))
        layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs)
        cost = layer3.negative_log_likelihood(y)
    
        params = layer3.params + layer1.params + layer0.params
        
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([], layer3.errors(y),
             givens={
                x: test_batch_x,
                y: T.cast(test_batch_y, 'int32')})

    validate_model = theano.function([], layer3.errors(y),
             givens={
                x: valid_batch_x,
                y: T.cast(valid_batch_y, 'int32')})

    predictions = theano.function([], layer3.get_predictions(),
            givens={
                x: valid_batch_x})
                
    conditional_dist = theano.function([], layer3.get_conditional_dist(),
            givens={
                x: valid_batch_x})

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    #adjusted_rate = learning_rate - iteration*(learning_rate/(float(n_epochs) * n_train_batches))
    adjusted_rate = learning_rate if T.lt(iteration,3000*200) else 0.1*learning_rate
    
    for param_i, grad_i in zip(params, grads):#, prev_grad_i   , prevGrads):
        updates.append((param_i, param_i - adjusted_rate * grad_i))# - momentum * prev_grad_i))
    
    #for i,grad in enumerate(grads):
    #    updates.append((prevGrads[i], grad))
    
    train_model = theano.function([iteration], cost, updates=updates,
         givens={
            x: train_batch_x,
            y: T.cast(train_batch_y, 'int32')},on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... Training ...'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.999  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = 10000         # min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    stime = time.time()

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 1000 == 0:
                print 'training @ iter = ', iter
                pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w"))
            if iter ==5:
                print 'estimated train time per epoch = '+ str((time.time() - stime) * n_train_batches/60.0/iter/60.0) + " hours"
            ax,ay = getBatch(trainNames, minibatch_index, trainCumLengths, batch_size,representation,batchType=batch_type,history=history)
            train_batch_x.set_value(ax)
            train_batch_y.set_value(ay)
            cost_ij = train_model(iter)

            if (iter + 1) % validation_frequency == 0 or iter==5:

                # compute zero-one loss on validation set
                validation_losses = []
                for i in xrange(n_valid_batches):
                    vx,vy = getBatch(validNames, i, validCumLengths, batch_size,representation,batchType='fast',history=history)
                    valid_batch_x.set_value(vx)
                    valid_batch_y.set_value(vy)
                    validation_losses.append(validate_model())
                this_validation_loss = numpy.mean(validation_losses)
        
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses=[]
                    for i in xrange(n_test_batches):
                        tx,ty = getBatch(testNames, i, testCumLengths, batch_size,representation,batchType='fast',history=history)
                        test_batch_x.set_value(tx)
                        test_batch_y.set_value(ty)
                        test_losses.append(test_model())
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

        fw.write("Epoch "+str(epoch) + ": " +str((1-this_validation_loss)*100.)+ "%\n")
        pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w"))
        
            #if patience <= iter:
            #    done_looping = True
            #    break

    fw.close()
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #21
0
def computeAcc(pred, y):
    return 1 if numpy.argmax(pred) == y else 0


model = pickle.load(open("results/1raw0.model", "r"))
(updates, cost, layer0, layer1, layer3, test_model, predictions,
 conditional_dist) = model

(trainCumLengths, validCumLengths, testCumLengths,
 filenames) = pickle.load(open("results/lengths.cache", 'r'))
fn = filenames[:2000]
fncl = trainCumLengths[:2000]
batch_size = 1

valid_batch_x, valid_batch_y = utils.shared_dataset(
    readGame.processSGFs(filenames[:6], 'raw'))
test_batch_x, test_batch_y = utils.shared_dataset(
    readGame.processSGFs(filenames[:6], 'raw'))

c = 0

while (True):
    c += 1
    fr = open("c2py", "r")
    txt = fr.read()
    fr.close()
    if txt == '':
        print "EMPTY INPUT"
        raise IOError
    #print "INPUT= '"+ txt + "'"
    #print "txt = " + txt
예제 #22
0
    def train(self, data=None, presences=None, **kwargs):
        """
        Pretrain the MLP on the patches of images.
        """

        learning_rate = kwargs["learning_rate"]
        L1_reg = kwargs["L1_reg"]
        L2_reg = kwargs["L2_reg"]
        n_epochs = kwargs["nepochs"]
        cost_type = kwargs["cost_type"]
        save_exp_data = kwargs["save_exp_data"]
        batch_size = kwargs["batch_size"]
        normalize_weights = kwargs["normalize_weights"]

        presences = numpy.asarray(presences.tolist(), dtype="uint8")
        self.learning_rate = learning_rate

        # Assign the state of MLP:
        self.state = "train"

        if data is None or presences is None:
            raise Exception(
                "Dataset or presences for pretraining can't be None.")

        if data.shape[0] != presences.shape[0]:
            raise Exception("Dataset and presences shape mismatch.")

        train_set_patches = shared_dataset(data, name="train_set_x")
        train_set_pre = shared_dataset(presences, name="train_set_pre")
        train_set_pre = T.cast(train_set_pre, "int32")

        # compute number of minibatches for training, validation and testing
        n_train_batches = int(math.ceil(data.shape[0] / batch_size))
        if self.output == 1 or self.output == 2:
            pre_train_probs =\
            numpy.zeros((data.shape[0], self.n_out))
        else:
            pre_train_probs =\
            numpy.zeros((data.shape[0], self.n_out * self.no_of_patches))

        ######################
        # Pretrain the MODEL #
        ######################
        print '... pretraining the model'

        # allocate symbolic variables for the data
        index = T.lscalar('index')  # index to a [mini]batch
        y = T.ivector(
            'y')  # the labels are presented as 1D vector of presences
        pindex = T.lscalar('pindex')

        #construct the MLP class
        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically.
        cost = self.get_cost_function(cost_type, y, L1_reg, L2_reg)
        p_y_given_x = self.class_memberships

        updates = self.sgd_updates(cost, learning_rate)

        # compiling a Theano function `train_model` that returns the cost, butx
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(
            inputs=[index, pindex],
            outputs=[cost, p_y_given_x],
            updates=updates,
            givens={
                self.input:
                train_set_patches[index * batch_size:(index + 1) * batch_size,
                                  pindex],
                y:
                train_set_pre[index * batch_size:(index + 1) * batch_size,
                              pindex]
            })

        epoch = 0
        costs = []
        Ws = []

        while (epoch < n_epochs):
            epoch_costs = []
            if normalize_weights:
                if epoch != 0:
                    self.normalize_weights()
            if not self.quiet:
                print "Training epoch %d has started." % (epoch)

            for minibatch_index in xrange(n_train_batches):
                minibatch_costs = []
                for pidx in xrange(self.no_of_patches):
                    minibatch_avg_cost, membership_probs = train_model(
                        minibatch_index, pidx)
                    minibatch_costs.append(float(minibatch_avg_cost.tolist()))
                    if self.output == 1:
                        pre_train_probs[minibatch_index * batch_size:\
                        (minibatch_index + 1) * batch_size] += membership_probs
                    if self.output == 2:
                        pre_train_probs[minibatch_index * batch_size:\
                        (minibatch_index + 1) * batch_size] *= 10 * membership_probs
                    else:
                        pre_train_probs[minibatch_index *
                                        batch_size:(minibatch_index + 1) *
                                        batch_size,
                                        pidx * self.n_out:(pidx + 1) *
                                        self.n_out] = membership_probs
                if self.output == 2:
                    pre_train_probs = numpy.sqrt(pre_train_probs)

                Ws.append(self.params[2])
                epoch_costs.append(minibatch_costs)

            costs.append(epoch_costs)
            if not self.quiet:
                print "Normalizing the weights"
            epoch += 1

        self.data_dict['costs'].append([costs])
        self.data_dict['train_probs'].append(pre_train_probs)
        return costs, pre_train_probs
예제 #23
0
def test_data_augmentation(learning_rate=0.01,L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False,steps = 1):
    """
    Wrapper function for experiment of data augmentation

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    rng = numpy.random.RandomState(23455)

    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset in raw Format, since we need to preprocess it
    train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False)

    # Repeat the training set 5 times
    train_set[1] = numpy.tile(train_set[1], 5)

    # TODO: translate the dataset
    train_set_x_u = translate_image(train_set[0],'top',steps)
    train_set_x_d = translate_image(train_set[0],'bottom',steps)
    train_set_x_r = translate_image(train_set[0],'right',steps)
    train_set_x_l = translate_image(train_set[0],'left',steps)

    # Stack the original dataset and the synthesized datasets
    train_set[0] = numpy.vstack((train_set[0],
                       train_set_x_u,
                       train_set_x_d,
                       train_set_x_r,
                       train_set_x_l))

    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
예제 #24
0
파일: evaluate.py 프로젝트: yxonic/DNN
def evaluate_lenet(learning_rate=0.1, n_epochs=200,
                   nkerns=[20, 50], batch_size=500):
    rng = np.random.RandomState(12345)

    print("Loading datasets...")
    train_set, valid_set, test_set = utils.load_MNIST()
    train_set_X, train_set_y = utils.shared_dataset(train_set)
    valid_set_X, valid_set_y = utils.shared_dataset(valid_set)
    test_set_X, test_set_y = utils.shared_dataset(test_set)

    # we cut data to batches so that we can efficiently load them to
    # GPU (if needed)
    n_train_batches = train_set_X.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_X.get_value(borrow=True).shape[0]
    n_test_batches = test_set_X.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    index = T.lscalar()         # index to batches

    X = T.matrix("X")
    y = T.ivector("y")

    print("Building the model...")

    # now we construct a 4-layer CNN

    # our inputs are 28*28 images with only one feature map, so we
    # reshape it to (batch_size, 1, 28, 28)
    layer0_input = X.reshape((batch_size, 1, 28, 28))

    # layer0: convolution+max-pooling layer
    layer0 = layers.ConvPoolLayer(
        rng=rng,
        input=layer0_input,
        input_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # layer1: convolution+max-pooling layer
    layer1 = layers.ConvPoolLayer(
        rng=rng,
        input=layer0.output,
        input_shape=layer0.output_shape,
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # layer2: fully-connected hidden layer
    layer2 = layers.MLPLayer(
        rng=rng,
        input=layer1.output.flatten(2),
        n_in=np.prod(layer1.output_shape[1:]),
        n_out=layer1.output_shape[0],
        activation=T.tanh
    )

    # layer3: logistic regression
    layer3 = layers.LogisticRegression(input=layer2.output,
                                       n_in=batch_size, n_out=10)

    cost = layer3.negative_log_likelihood(y)

    # construct functions to compute errors on test/validation sets
    valid_error = theano.function(
        [index],
        layer3.errors(y),
        givens={
            X: valid_set_X[index*batch_size:(index+1)*batch_size],
            y: valid_set_y[index*batch_size:(index+1)*batch_size]
        }
    )

    test_error = theano.function(
        [index],
        layer3.errors(y),
        givens={
            X: test_set_X[index*batch_size:(index+1)*batch_size],
            y: test_set_y[index*batch_size:(index+1)*batch_size]
        }
    )

    # a list of all parameters in this model
    params = layer0.params + layer1.params + layer2.params + layer3.params

    grads = T.grad(cost, params)

    # parameter update rule in stochastic gradient descent
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            X: train_set_X[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]
        }
    )

    predict_model = theano.function([X], layer3.output)

    print("Training...")

    # we use the early-stopping strategy
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience // 2)

    best_validation_score = 0.
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done = False

    while (epoch < n_epochs) and (not done):
        epoch += 1
        for minibatch_index in range(n_train_batches):
            iter = (epoch-1) * n_train_batches + minibatch_index
            if iter % 100 == 0:
                print("iter =", iter)

            train_model(minibatch_index)

            if (iter+1) % validation_frequency == 0:
                valid_errors = [valid_error(i)
                                for i in range(n_valid_batches)]
                score = 1 - np.mean(valid_errors)
                print('epoch {}, minibatch {}/{}, validation accuracy {}'
                      .format(epoch, minibatch_index + 1,
                              n_train_batches, score))
                if score > best_validation_score:
                    best_validation_score = score
                    best_iter = iter

                    # increase patience if improvement is large enough
                    if (1-score) < \
                       (1-best_validation_score) * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # test it on test set
                    test_errors = [test_error(i)
                                   for i in range(n_test_batches)]
                    test_score = 1 - np.mean(test_errors)
                    print('    test score:', test_score)

                    # store best model to file
                    with open('tmp/best_cnn.pkl', 'wb') as f:
                        pickle.dump((predict_model, batch_size), f)

            if patience <= iter:
                done = True
                break   # break the batches loop

    end_time = timeit.default_timer()
    print('Finished training. Total time:',
          (end_time - start_time) / 60, 'min')
    print('Best validation score of', best_validation_score,
          'obtained at iter', best_iter)
    print('Precision: ', test_score)
예제 #25
0
def train(learning_rate=0.1, n_epochs=100, batch_size=320, batch_type = 'fast',
                    mynet = 'one', representation='raw', momentum=0, history=0):

    rng = numpy.random.RandomState(42)

    trainP = 0.8
    validP = 0.1
    testP  = 0.1   
    
#    print "... Reading cached values ..."
#    (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("results/5x5.cache",'r'))
    
    print "... Getting filenames ..."
    datasetMY = "../MC player/20kgames9"
    fn1 = readGame.getFilenames(datasetMY,1,0,1)[0]
    random.shuffle(fn1)    
    filenames = fn1
    n = len(filenames)
    print "... Learning set contains " + str(n) + " games"
    
    print "... Computing cumulative game lengths ..."
    trainNames = filenames[:int(trainP*n)]
    validNames = filenames[int(trainP*n):int(trainP*n+validP*n)]
    testNames  = filenames[int(trainP*n+validP*n):int(trainP*n+validP*n+testP*n)]
    
    random.shuffle(trainNames)
    
    trainCumLengths = readGame.getCumGameLengths(trainNames,ftype="game")
    validCumLengths = readGame.getCumGameLengths(validNames,ftype="game")
    testCumLengths = readGame.getCumGameLengths(testNames,ftype="game")
    
    fw = open("results/"+str(gs)+"x"+str(gs)+".cache","wb")
    pickle.dump((trainCumLengths,validCumLengths,testCumLengths,filenames),fw)
    fw.close()
    print "... Preprocessing initial batches ..."
    minn = batch_size / 10 +1
    temp = time.time()
    test_batch_x, test_batch_y = utils.shared_dataset(readGame.processGAMEs(testNames[:minn],representation,gs=gs),batch_size=batch_size,board_size=gs)
    train_batch_x, train_batch_y = utils.shared_dataset(readGame.processGAMEs(trainNames[:minn],representation,gs=gs),batch_size=batch_size,board_size=gs)
    valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processGAMEs(validNames[:minn],representation,gs=gs),batch_size=batch_size,board_size=gs)
    print "    average processing time per game: " + str((time.time()-temp)/18.0) + " seconds, per epoch: " + str(int((time.time()-temp)/18*n/60/60)) + " hours" 

    # compute number of minibatches for training, validation and testing
    n_train_batches = trainCumLengths[-1]
    n_valid_batches = validCumLengths[-1]
    n_test_batches =  testCumLengths[-1]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    iteration = T.lscalar()  # iteration number of a minibatch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (gs, gs)  # this is the size of MNIST images

    fw = open("results/"+mynet+"_"+str(learning_rate)+"_"+".res","w")
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... Building the model ...'
   
    nc = 2 if representation=='raw' else 6  # if raw
    nc *= 1+history



       
    if mynet == "zero":
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LogisticRegression(input=layer0_input.flatten(2), n_in=nc*gs*gs, n_out=gs*gs)
        cost = layer0.negative_log_likelihood(y)
    
        params = layer0.params

    if mynet == "one":
        nHiddens = 500
        layer1_input = x.reshape((batch_size, nc, gs, gs))
        layer1 = HiddenLayer(rng, input=layer1_input.flatten(2), n_in=nc * gs * gs,
                           n_out=nHiddens, activation=T.tanh)
        layer0 = LogisticRegression(input=layer1.output, n_in=nHiddens, n_out=gs*gs)
        cost = layer0.negative_log_likelihood(y)
    
        params = layer0.params + layer1.params
        
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([], layer0.errors(y),
             givens={
                x: test_batch_x,
                y: T.cast(test_batch_y, 'int32')})

    validate_model = theano.function([], layer0.errors(y),
             givens={
                x: valid_batch_x,
                y: T.cast(valid_batch_y, 'int32')})

    predictions = theano.function([], layer0.get_predictions(),
            givens={
                x: valid_batch_x})
                
    conditional_dist = theano.function([], layer0.get_conditional_dist(),
            givens={
                x: valid_batch_x})

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    #adjusted_rate = learning_rate - iteration*(learning_rate/(float(n_epochs) * n_train_batches))
    adjusted_rate = learning_rate if T.lt(iteration,3000*200) else 0.1*learning_rate
    
    for param_i, grad_i in zip(params, grads):#, prev_grad_i   , prevGrads):
        updates.append((param_i, param_i - adjusted_rate * grad_i))# - momentum * prev_grad_i))
    
    #for i,grad in enumerate(grads):
    #    updates.append((prevGrads[i], grad))
    
    train_model = theano.function([iteration], cost, updates=updates,
         givens={
            x: train_batch_x,
            y: T.cast(train_batch_y, 'int32')},on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... Training ...'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.999  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = 2000         # min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    stime = time.time()

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 500 == 0:
                print 'training @ iter = ', iter
                pickle.dump((updates,cost,layer0,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w"))
            if iter ==5:
                print 'estimated train time per epoch = '+ str((time.time() - stime) * n_train_batches/60.0/iter/60.0) + " hours"
            ax,ay = getBatch(trainNames, minibatch_index, trainCumLengths, batch_size,representation,batchType=batch_type,history=history)
            train_batch_x.set_value(ax)
            train_batch_y.set_value(ay)
            cost_ij = train_model(iter)

            if (iter + 1) % validation_frequency == 0 or iter==5:

                # compute zero-one loss on validation set
                validation_losses = []
                for i in xrange(n_valid_batches):
                    vx,vy = getBatch(validNames, i, validCumLengths, batch_size,representation,batchType='fast',history=history)
                    valid_batch_x.set_value(vx)
                    valid_batch_y.set_value(vy)
                    validation_losses.append(validate_model())
                this_validation_loss = numpy.mean(validation_losses)
        
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses=[]
                    for i in xrange(n_test_batches):
                        tx,ty = getBatch(testNames, i, testCumLengths, batch_size,representation,batchType='fast',history=history)
                        test_batch_x.set_value(tx)
                        test_batch_y.set_value(ty)
                        test_losses.append(test_model())
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

        #fw.write("Epoch "+str(epoch) + ": " +str((1-this_validation_loss)*100.)+ "%\n")
        pickle.dump((updates,cost,layer0,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w"))
        
            #if patience <= iter:
            #    done_looping = True
            #    break

    fw.close()
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))