Exemplo n.º 1
0
	def testSoftmax(self):
		'''
		Using Softmax classifier
		'''
		input = T.matrix(name='input')
		truth = T.ivector(name='label')
		learning_rate = T.scalar(name='learning rate')
		num_in, num_out = 50, 8
		softmax = SoftmaxLayer(input, (num_in, num_out))
		lambdas = 1e-5
		cost = softmax.NLL_loss(truth) + lambdas * softmax.L2_loss()
		params = softmax.params
		gradparams = T.grad(cost, params)
		updates = []
		for param, gradparam in zip(params, gradparams):
			updates.append((param, param-learning_rate*gradparam))
		obj = theano.function(inputs=[input, truth, learning_rate], outputs=cost, updates=updates)
		# Training
		nepoch = 5000
		start_time = time.time()
		for i in xrange(nepoch):
			rate = 2.0 / (1.0 + i/500)
			func_value = obj(self.snippet_train_set, self.snippet_train_label, rate)		
			prediction = softmax.predict(self.snippet_train_set)
			accuracy = np.sum(prediction == self.snippet_train_label) / float(self.snippet_train_label.shape[0])
			pprint('epoch %d, cost = %f, accuracy = %f' % (i, func_value, accuracy))
		end_time = time.time()
		pprint('Time used to train the softmax classifier: %f minutes.' % ((end_time-start_time)/60))
		# Test
		test_size = self.snippet_test_label.shape[0]
		prediction = softmax.predict(self.snippet_test_set)
		accuracy = np.sum(prediction == self.snippet_test_label) / float(test_size)
		pprint('Test accuracy: %f' % accuracy)
Exemplo n.º 2
0
	def testSoftmax(self):
		'''
		Sentiment analysis task for sentence representation using 
		softmax classifier.
		'''
		input = T.matrix(name='input')
		label = T.ivector(name='label')
		learning_rate = T.scalar(name='learning rate')
		num_in, num_out = 50, 2
		softmax = SoftmaxLayer(input, (num_in, num_out))
		lambdas = 1e-5
		# cost = softmax.NLL_loss(label) + lambdas * softmax.L2_loss()
		cost = softmax.NLL_loss(label)
		params = softmax.params
		gradparams = T.grad(cost, params)
		updates = []
		for param, gradparam in zip(params, gradparams):
			updates.append((param, param-learning_rate*gradparam))
		objective = theano.function(inputs=[input, label, learning_rate], outputs=cost, updates=updates)
		# Training
		nepoch = 5000
		start_time = time.time() 
		for i in xrange(nepoch):
			rate = 2.0 / ((1.0 + i/500) ** 2)
			func_value = objective(self.senti_train_set, self.senti_train_label, rate)
			prediction = softmax.predict(self.senti_train_set)
			accuracy = np.sum(prediction == self.senti_train_label) / float(self.train_size)
			pprint('epoch %d, cost = %f, accuracy = %f' % (i, func_value, accuracy))
		end_time = time.time()
		pprint('Time used to train the softmax classifier: %f minutes' % ((end_time-start_time)/60))
		# Test
		prediction = softmax.predict(self.senti_test_set)
		accuracy = np.sum(prediction == self.senti_test_label) / float(self.test_size)
		pprint('Test accuracy: %f' % accuracy)
Exemplo n.º 3
0
    def testSoftmax(self):
        '''
		Sentiment analysis task for sentence representation using 
		softmax classifier.
		'''
        input = T.matrix(name='input')
        label = T.ivector(name='label')
        learning_rate = T.scalar(name='learning rate')
        num_in, num_out = 50, 2
        softmax = SoftmaxLayer(input, (num_in, num_out))
        lambdas = 1e-5
        # cost = softmax.NLL_loss(label) + lambdas * softmax.L2_loss()
        cost = softmax.NLL_loss(label)
        params = softmax.params
        gradparams = T.grad(cost, params)
        updates = []
        for param, gradparam in zip(params, gradparams):
            updates.append((param, param - learning_rate * gradparam))
        objective = theano.function(inputs=[input, label, learning_rate],
                                    outputs=cost,
                                    updates=updates)
        # Training
        nepoch = 5000
        start_time = time.time()
        for i in xrange(nepoch):
            rate = 2.0 / ((1.0 + i / 500)**2)
            func_value = objective(self.senti_train_set,
                                   self.senti_train_label, rate)
            prediction = softmax.predict(self.senti_train_set)
            accuracy = np.sum(prediction == self.senti_train_label) / float(
                self.train_size)
            pprint('epoch %d, cost = %f, accuracy = %f' %
                   (i, func_value, accuracy))
        end_time = time.time()
        pprint('Time used to train the softmax classifier: %f minutes' %
               ((end_time - start_time) / 60))
        # Test
        prediction = softmax.predict(self.senti_test_set)
        accuracy = np.sum(prediction == self.senti_test_label) / float(
            self.test_size)
        pprint('Test accuracy: %f' % accuracy)
Exemplo n.º 4
0
    def testSoftmax(self):
        '''
		Using Softmax classifier
		'''
        input = T.matrix(name='input')
        truth = T.ivector(name='label')
        learning_rate = T.scalar(name='learning rate')
        num_in, num_out = 50, 8
        softmax = SoftmaxLayer(input, (num_in, num_out))
        lambdas = 1e-5
        cost = softmax.NLL_loss(truth) + lambdas * softmax.L2_loss()
        params = softmax.params
        gradparams = T.grad(cost, params)
        updates = []
        for param, gradparam in zip(params, gradparams):
            updates.append((param, param - learning_rate * gradparam))
        obj = theano.function(inputs=[input, truth, learning_rate],
                              outputs=cost,
                              updates=updates)
        # Training
        nepoch = 5000
        start_time = time.time()
        for i in xrange(nepoch):
            rate = 2.0 / (1.0 + i / 500)
            func_value = obj(self.snippet_train_set, self.snippet_train_label,
                             rate)
            prediction = softmax.predict(self.snippet_train_set)
            accuracy = np.sum(prediction == self.snippet_train_label) / float(
                self.snippet_train_label.shape[0])
            pprint('epoch %d, cost = %f, accuracy = %f' %
                   (i, func_value, accuracy))
        end_time = time.time()
        pprint('Time used to train the softmax classifier: %f minutes.' %
               ((end_time - start_time) / 60))
        # Test
        test_size = self.snippet_test_label.shape[0]
        prediction = softmax.predict(self.snippet_test_set)
        accuracy = np.sum(
            prediction == self.snippet_test_label) / float(test_size)
        pprint('Test accuracy: %f' % accuracy)
Exemplo n.º 5
0
    def __init__(self, configs=None, verbose=True):
        '''
		@config: CNNConfiger. Configer used to set the architecture of CNN.
		'''
        if verbose: pprint("Building Convolutional Neural Network...")
        # Make theano symbolic tensor for input and ground truth label
        self.input = T.tensor4(name='input', dtype=floatX)
        self.truth = T.ivector(name='label')
        self.learn_rate = T.scalar(name='learn rate')
        self.batch_size = configs.batch_size
        self.image_row = configs.image_row
        self.image_col = configs.image_col
        # There may have multiple convolution-pooling and multi-layer perceptrons.
        self.convpool_layers = []
        self.hidden_layers = []
        self.softmax_layers = []
        # Configure activation function
        self.act = Activation(configs.activation)
        # Configuration should be valid
        assert configs.num_convpool == len(configs.convs)
        assert configs.num_convpool == len(configs.pools)
        assert configs.num_hidden == len(configs.hiddens)
        assert configs.num_softmax == len(configs.softmaxs)
        # Construct random number generator
        srng = T.shared_randomstreams.RandomStreams(configs.random_seed)
        # Build architecture of CNN
        # Convolution and Pooling layers
        image_shapes, filter_shapes = [], []
        for i in xrange(configs.num_convpool):
            if i == 0:
                image_shapes.append(
                    (self.batch_size, 1, self.image_row, self.image_col))
                filter_shapes.append(
                    (configs.convs[i][0], 1, configs.convs[i][1],
                     configs.convs[i][2]))
            else:
                image_shapes.append(
                    (self.batch_size, configs.convs[i - 1][0],
                     (image_shapes[i - 1][2] - configs.convs[i - 1][1] + 1) /
                     configs.pools[i - 1][0],
                     (image_shapes[i - 1][3] - configs.convs[i - 1][2] + 1) /
                     configs.pools[i - 1][1]))
                filter_shapes.append(
                    (configs.convs[i][0], configs.convs[i - 1][0],
                     configs.convs[i][1], configs.convs[i][2]))
        for i in xrange(configs.num_convpool):
            if i == 0:
                current_input = self.input
            else:
                current_input = self.convpool_layers[i - 1].output
            self.convpool_layers.append(
                LeNetConvPoolLayer(input=current_input,
                                   filter_shape=filter_shapes[i],
                                   image_shape=image_shapes[i],
                                   poolsize=configs.pools[i],
                                   act=self.act))
        # Multilayer perceptron layers
        for i in xrange(configs.num_hidden):
            if i == 0:
                current_input = T.flatten(
                    self.convpool_layers[configs.num_convpool - 1].output, 2)
            else:
                current_input = self.hidden_layers[i - 1].output
            # Adding dropout to hidden layers
            hidden_layer = HiddenLayer(current_input,
                                       configs.hiddens[i],
                                       act=self.act)
            mask = srng.binomial(n=1,
                                 p=1 - configs.dropout,
                                 size=hidden_layer.shape)
            hidden_layer *= T.cast(mask, floatX)
            self.hidden_layers.append(hidden_layer)
        # Softmax Layer, for most case, the architecture will only contain one softmax layer
        for i in xrange(configs.num_softmax):
            if i == 0:
                current_input = self.hidden_layers[configs.num_hidden -
                                                   1].output
            else:
                current_input = self.softmax_layers[i - 1].output
            self.softmax_layers.append(
                SoftmaxLayer(current_input, configs.softmaxs[i]))
        # Output
        self.pred = self.softmax_layers[configs.num_softmax - 1].prediction()
        # Cost function with ground truth provided
        self.cost = self.softmax_layers[configs.num_softmax - 1].NLL_loss(
            self.truth)
        # Build cost function
        # Stack all the parameters
        self.params = []
        for convpool_layer in self.convpool_layers:
            self.params.extend(convpool_layer.params)
        for hidden_layer in self.hidden_layers:
            self.params.extend(hidden_layer.params)
        for softmax_layer in self.softmax_layers:
            self.params.extend(softmax_layer.params)
        # Compute gradient of self.cost with respect to network parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Stochastic gradient descent learning algorithm
        self.updates = []
        for param, gradparam in zip(self.params, self.gradparams):
            self.updates.append((param, param - self.learn_rate * gradparam))
        # Build objective function
        self.objective = theano.function(
            inputs=[self.input, self.truth, self.learn_rate],
            outputs=self.cost,
            updates=self.updates)
        # Build prediction function
        self.predict = theano.function(inputs=[self.input], outputs=self.pred)
        if verbose:
            pprint('Architecture building finished, summarized as below: ')
            pprint(
                'There are %d layers (not including the input layer) algether: '
                % (configs.num_convpool * 2 + configs.num_hidden +
                   configs.num_softmax))
            pprint('%d convolution layers + %d maxpooling layers.' %
                   (len(self.convpool_layers), len(self.convpool_layers)))
            pprint('%d hidden layers.' % (len(self.hidden_layers)))
            pprint('%d softmax layers.' % (len(self.softmax_layers)))
            pprint('=' * 50)
            pprint('Detailed architecture of each layer: ')
            pprint('-' * 50)
            pprint('Convolution and Pooling layers: ')
            for i in xrange(len(self.convpool_layers)):
                pprint('Convolution Layer %d: ' % i)
                pprint(
                    '%d feature maps, each has a filter kernel with size (%d, %d)'
                    % (configs.convs[i][0], configs.convs[i][1],
                       configs.convs[i][2]))
            pprint('-' * 50)
            pprint('Hidden layers: ')
            for i in xrange(len(self.hidden_layers)):
                pprint('Hidden Layer %d: ' % i)
                pprint('Input dimension: %d, Output dimension: %d' %
                       (configs.hiddens[i][0], configs.hiddens[i][1]))
            pprint('-' * 50)
            pprint('Softmax layers: ')
            for i in xrange(len(self.softmax_layers)):
                pprint('Softmax Layer %d: ' % i)
                pprint('Input dimension: %d, Output dimension: %d' %
                       (configs.softmaxs[i][0], configs.softmaxs[i][1]))
Exemplo n.º 6
0
    def testSoftmaxWithRaw(self):
        '''
		sentiment analysis task with softmax on raw input
		'''
        pprint('In testSoftmaxWithRaw...')
        input = T.matrix(name='input')
        label = T.ivector(name='label')
        learning_rate = T.scalar(name='learning rate')
        num_in, num_out = self.word_embedding.dict_size(), 2
        softmax = SoftmaxLayer(input, (num_in, num_out))
        lambdas = 1e-5
        # cost = softmax.NLL_loss(label) + lambdas * softmax.L2_loss()
        cost = softmax.NLL_loss(label)
        params = softmax.params
        gradparams = T.grad(cost, params)
        updates = []
        for param, gradparam in zip(params, gradparams):
            updates.append((param, param - learning_rate * gradparam))
        objective = theano.function(inputs=[input, label, learning_rate],
                                    outputs=cost,
                                    updates=updates)
        # Training
        nepoch = 4000
        start_time = time.time()
        # Create sparse representation of input matrix
        train_batch_sparse = lil_matrix(
            (self.word_embedding.dict_size(), self.train_size), dtype=floatX)
        test_batch_sparse = lil_matrix(
            (self.word_embedding.dict_size(), self.test_size), dtype=floatX)
        # Build sparse training matrix
        for i, sent in enumerate(self.senti_train_txt):
            words = sent.split()
            words = [word.lower() for word in words]
            # Create sparse representation
            tmp_indices = [
                self.word_embedding.word2index(word) for word in words
            ]
            tmp_counts = {
                ind: tmp_indices.count(ind)
                for ind in set(tmp_indices)
            }
            # Incremental updating
            train_batch_sparse[tmp_counts.keys(),
                               i] = np.asarray(tmp_counts.values())[:,
                                                                    np.newaxis]
            train_batch_sparse[tmp_counts.keys(), i] /= len(words)
        # Build sparse test matrix
        for i, sent in enumerate(self.senti_test_txt):
            words = sent.split()
            words = [word.lower() for word in words]
            # Create sparse representation
            tmp_indices = [
                self.word_embedding.word2index(word) for word in words
            ]
            tmp_counts = {
                ind: tmp_indices.count(ind)
                for ind in set(tmp_indices)
            }
            # Incremental updating
            test_batch_sparse[tmp_counts.keys(),
                              i] = np.asarray(tmp_counts.values())[:,
                                                                   np.newaxis]
            test_batch_sparse[tmp_counts.keys(), i] /= len(words)
        # Convert to csc-based for fast matrix-matrix multiplication
        train_batch_sparse = train_batch_sparse.todense().T
        test_batch_sparse = test_batch_sparse.todense().T
        end_time = time.time()
        pprint('Time used to build the sparse input matrix: %f seconds.' %
               (end_time - start_time))
        start_time = time.time()
        # Epoch training
        for i in xrange(nepoch):
            rate = 2.5 / (1.0 + i / 500)
            # Training
            func_value = objective(train_batch_sparse, self.senti_train_label,
                                   rate)
            prediction = softmax.predict(train_batch_sparse)
            accuracy = np.sum(prediction == self.senti_train_label) / float(
                self.train_size)
            pprint('Epoch: %d, total cost: %f, training accuracy: %f' %
                   (i, func_value, accuracy))
        end_time = time.time()
        pprint(
            'Time used to train the softmax classifier with fine-tuning: %f minutes'
            % ((end_time - start_time) / 60))
        # Evaluation on test set
        prediction = softmax.predict(test_batch_sparse)
        accuracy = np.sum(prediction == self.senti_test_label) / float(
            self.test_size)
        pprint('Test accuracy: %f' % accuracy)
        with gzip.GzipFile('raw-softmax.sent.gz', 'wb') as fout:
            cPickle.dump(softmax, fout)
        pprint('model saved: raw-softmax.sent.gz')
Exemplo n.º 7
0
    def testSoftmaxWithFineTuning(self):
        '''
		Sentiment analysis task with softmax classifier and fine tuning
		'''
        input = T.matrix(name='input')
        label = T.ivector(name='label')
        learning_rate = T.scalar(name='learning rate')
        num_in, num_out = 50, 2
        softmax = SoftmaxLayer(input, (num_in, num_out))
        lambdas = 1e-5
        # cost = softmax.NLL_loss(label) + lambdas * softmax.L2_loss()
        cost = softmax.NLL_loss(label)
        params = softmax.params
        gradparams = T.grad(cost, params)
        updates = []
        for param, gradparam in zip(params, gradparams):
            updates.append((param, param - learning_rate * gradparam))
        objective = theano.function(inputs=[input, label, learning_rate],
                                    outputs=cost,
                                    updates=updates)
        grad_to_input = T.grad(cost, input)
        compute_grad_to_input = theano.function(inputs=[input, label],
                                                outputs=grad_to_input)
        # Training
        nepoch = 4000
        start_time = time.time()
        # Create sparse representation of input matrix
        train_batch_sparse = lil_matrix(
            (self.word_embedding.dict_size(), self.train_size), dtype=floatX)
        for i, sent in enumerate(self.senti_train_txt):
            words = sent.split()
            words = [word.lower() for word in words]
            # Create sparse representation
            tmp_indices = [
                self.word_embedding.word2index(word) for word in words
            ]
            tmp_counts = {
                ind: tmp_indices.count(ind)
                for ind in set(tmp_indices)
            }
            # Incremental updating
            train_batch_sparse[tmp_counts.keys(),
                               i] = np.asarray(tmp_counts.values())[:,
                                                                    np.newaxis]
            train_batch_sparse[tmp_counts.keys(), i] /= len(words)
        # Convert to csc-based for fast matrix-matrix multiplication
        train_batch_sparse = csc_matrix(train_batch_sparse)
        end_time = time.time()
        pprint('Time used to build the sparse input matrix: %f seconds.' %
               (end_time - start_time))
        start_time = time.time()
        # Epoch training
        for i in xrange(nepoch):
            rate = 2.0 / (1.0 + i / 500)
            # Dynamically use current version of word-embedding matrix
            train_batch = train_batch_sparse.T.dot(
                self.word_embedding.embedding)
            # Training
            func_value = objective(train_batch, self.senti_train_label, rate)
            prediction = softmax.predict(train_batch)
            accuracy = np.sum(prediction == self.senti_train_label) / float(
                self.train_size)
            pprint('Epoch: %d, total cost: %f, training accuracy: %f' %
                   (i, func_value, accuracy))
            # Fine-tuning the word-embedding matrix
            train_batch_grad = compute_grad_to_input(train_batch,
                                                     self.senti_train_label)
            self.word_embedding._embedding -= rate * train_batch_sparse.dot(
                train_batch_grad)
            if (i + 1) % 100 == 0:
                # Test
                pprint('-' * 50)
                test_batch = np.zeros(
                    (self.test_size, self.word_embedding.embedding_dim()),
                    dtype=floatX)
                for i, sent in enumerate(self.senti_test_txt):
                    words = sent.split()
                    words = [word.lower() for word in words]
                    vectors = np.asarray(
                        [self.word_embedding.wordvec(word) for word in words])
                    test_batch[i, :] = np.mean(vectors, axis=0)
                # Evaluation on test set
                prediction = softmax.predict(test_batch)
                accuracy = np.sum(prediction == self.senti_test_label) / float(
                    self.test_size)
                pprint('Test accuracy: %f' % accuracy)
                pprint('-' * 50)
        end_time = time.time()
        pprint(
            'Time used to train the softmax classifier with fine-tuning: %f minutes'
            % ((end_time - start_time) / 60))
        # Test
        test_batch = np.zeros(
            (self.test_size, self.word_embedding.embedding_dim()),
            dtype=floatX)
        for i, sent in enumerate(self.senti_test_txt):
            words = sent.split()
            words = [word.lower() for word in words]
            vectors = np.asarray(
                [self.word_embedding.wordvec(word) for word in words])
            test_batch[i, :] = np.mean(vectors, axis=0)
        # Evaluation on test set
        prediction = softmax.predict(test_batch)
        accuracy = np.sum(prediction == self.senti_test_label) / float(
            self.test_size)
        pprint('Test accuracy: %f' % accuracy)
Exemplo n.º 8
0
	def testSoftmaxWithRaw(self):
		'''
		sentiment analysis task with softmax on raw input
		'''
		pprint('In testSoftmaxWithRaw...')
		input = T.matrix(name='input')
		label = T.ivector(name='label')
		learning_rate = T.scalar(name='learning rate')
		num_in, num_out = self.word_embedding.dict_size(), 2
		softmax = SoftmaxLayer(input, (num_in, num_out))
		lambdas = 1e-5
		# cost = softmax.NLL_loss(label) + lambdas * softmax.L2_loss()
		cost = softmax.NLL_loss(label)
		params = softmax.params
		gradparams = T.grad(cost, params)
		updates = []
		for param, gradparam in zip(params, gradparams):
			updates.append((param, param-learning_rate*gradparam))
		objective = theano.function(inputs=[input, label, learning_rate], outputs=cost, updates=updates)
		# Training
		nepoch = 4000
		start_time = time.time() 
		# Create sparse representation of input matrix
		train_batch_sparse = lil_matrix((self.word_embedding.dict_size(), self.train_size), dtype=floatX)
		test_batch_sparse = lil_matrix((self.word_embedding.dict_size(), self.test_size), dtype=floatX)
		# Build sparse training matrix
		for i, sent in enumerate(self.senti_train_txt):
			words = sent.split()
			words = [word.lower() for word in words]
			# Create sparse representation
			tmp_indices = [self.word_embedding.word2index(word) for word in words]
			tmp_counts = {ind : tmp_indices.count(ind) for ind in set(tmp_indices)}
			# Incremental updating
			train_batch_sparse[tmp_counts.keys(), i] = np.asarray(tmp_counts.values())[:, np.newaxis]
			train_batch_sparse[tmp_counts.keys(), i] /= len(words)
		# Build sparse test matrix
		for i, sent in enumerate(self.senti_test_txt):
			words = sent.split()
			words = [word.lower() for word in words]
			# Create sparse representation
			tmp_indices = [self.word_embedding.word2index(word) for word in words]
			tmp_counts = {ind : tmp_indices.count(ind) for ind in set(tmp_indices)}
			# Incremental updating
			test_batch_sparse[tmp_counts.keys(), i] = np.asarray(tmp_counts.values())[:, np.newaxis]
			test_batch_sparse[tmp_counts.keys(), i] /= len(words)
		# Convert to csc-based for fast matrix-matrix multiplication
		train_batch_sparse = train_batch_sparse.todense().T
		test_batch_sparse = test_batch_sparse.todense().T
		end_time = time.time()
		pprint('Time used to build the sparse input matrix: %f seconds.' % (end_time-start_time))
		start_time = time.time()
		# Epoch training
		for i in xrange(nepoch):
			rate = 2.5 / (1.0 + i/500)
			# Training
			func_value = objective(train_batch_sparse, self.senti_train_label, rate)
			prediction = softmax.predict(train_batch_sparse)
			accuracy = np.sum(prediction == self.senti_train_label) / float(self.train_size)
			pprint('Epoch: %d, total cost: %f, training accuracy: %f' % (i, func_value, accuracy))
		end_time = time.time()
		pprint('Time used to train the softmax classifier with fine-tuning: %f minutes' % ((end_time-start_time)/60))
		# Evaluation on test set
		prediction = softmax.predict(test_batch_sparse)
		accuracy = np.sum(prediction == self.senti_test_label) / float(self.test_size)
		pprint('Test accuracy: %f' % accuracy)
		with gzip.GzipFile('raw-softmax.sent.gz', 'wb') as fout:
			cPickle.dump(softmax, fout)
		pprint('model saved: raw-softmax.sent.gz')
Exemplo n.º 9
0
	def testSoftmaxWithFineTuning(self):
		'''
		Sentiment analysis task with softmax classifier and fine tuning
		'''
		input = T.matrix(name='input')
		label = T.ivector(name='label')
		learning_rate = T.scalar(name='learning rate')
		num_in, num_out = 50, 2
		softmax = SoftmaxLayer(input, (num_in, num_out))
		lambdas = 1e-5
		# cost = softmax.NLL_loss(label) + lambdas * softmax.L2_loss()
		cost = softmax.NLL_loss(label)
		params = softmax.params
		gradparams = T.grad(cost, params)
		updates = []
		for param, gradparam in zip(params, gradparams):
			updates.append((param, param-learning_rate*gradparam))
		objective = theano.function(inputs=[input, label, learning_rate], outputs=cost, updates=updates)
		grad_to_input = T.grad(cost, input)
		compute_grad_to_input = theano.function(inputs=[input, label], outputs=grad_to_input)
		# Training
		nepoch = 4000
		start_time = time.time() 
		# Create sparse representation of input matrix
		train_batch_sparse = lil_matrix((self.word_embedding.dict_size(), self.train_size), dtype=floatX)
		for i, sent in enumerate(self.senti_train_txt):
			words = sent.split()
			words = [word.lower() for word in words]
			# Create sparse representation
			tmp_indices = [self.word_embedding.word2index(word) for word in words]
			tmp_counts = {ind : tmp_indices.count(ind) for ind in set(tmp_indices)}
			# Incremental updating
			train_batch_sparse[tmp_counts.keys(), i] = np.asarray(tmp_counts.values())[:, np.newaxis]
			train_batch_sparse[tmp_counts.keys(), i] /= len(words)
		# Convert to csc-based for fast matrix-matrix multiplication
		train_batch_sparse = csc_matrix(train_batch_sparse)
		end_time = time.time()
		pprint('Time used to build the sparse input matrix: %f seconds.' % (end_time-start_time))
		start_time = time.time()
		# Epoch training
		for i in xrange(nepoch):
			rate = 2.0 / (1.0 + i/500)
			# Dynamically use current version of word-embedding matrix
			train_batch = train_batch_sparse.T.dot(self.word_embedding.embedding)
			# Training
			func_value = objective(train_batch, self.senti_train_label, rate)
			prediction = softmax.predict(train_batch)
			accuracy = np.sum(prediction == self.senti_train_label) / float(self.train_size)
			pprint('Epoch: %d, total cost: %f, training accuracy: %f' % (i, func_value, accuracy))
			# Fine-tuning the word-embedding matrix
			train_batch_grad = compute_grad_to_input(train_batch, self.senti_train_label)
			self.word_embedding._embedding -= rate * train_batch_sparse.dot(train_batch_grad)
			if (i+1) % 100 == 0:
				# Test
				pprint('-' * 50)
				test_batch = np.zeros((self.test_size, self.word_embedding.embedding_dim()), dtype=floatX)
				for i, sent in enumerate(self.senti_test_txt):
					words = sent.split()
					words = [word.lower() for word in words]
					vectors = np.asarray([self.word_embedding.wordvec(word) for word in words])
					test_batch[i, :] = np.mean(vectors, axis=0)
				# Evaluation on test set
				prediction = softmax.predict(test_batch)
				accuracy = np.sum(prediction == self.senti_test_label) / float(self.test_size)
				pprint('Test accuracy: %f' % accuracy)
				pprint('-' * 50)
		end_time = time.time()
		pprint('Time used to train the softmax classifier with fine-tuning: %f minutes' % ((end_time-start_time)/60))
		# Test
		test_batch = np.zeros((self.test_size, self.word_embedding.embedding_dim()), dtype=floatX)
		for i, sent in enumerate(self.senti_test_txt):
			words = sent.split()
			words = [word.lower() for word in words]
			vectors = np.asarray([self.word_embedding.wordvec(word) for word in words])
			test_batch[i, :] = np.mean(vectors, axis=0)
		# Evaluation on test set
		prediction = softmax.predict(test_batch)
		accuracy = np.sum(prediction == self.senti_test_label) / float(self.test_size)
		pprint('Test accuracy: %f' % accuracy)
Exemplo n.º 10
0
 def __init__(self, config=None, verbose=True):
     self.encoder = GrCNNEncoder(config, verbose)
     # Link two parts
     self.params = self.encoder.params
     self.input = self.encoder.input
     self.hidden = self.encoder.output
     # Activation function
     self.act = Activation(config.activation)
     # MLP Component
     self.hidden_layer = HiddenLayer(self.hidden,
                                     (config.num_hidden, config.num_mlp),
                                     act=Activation(config.hiddenact))
     self.compressed_hidden = self.hidden_layer.output
     # Dropout regularization
     srng = T.shared_randomstreams.RandomStreams(config.random_seed)
     mask = srng.binomial(n=1,
                          p=1 - config.dropout,
                          size=self.compressed_hidden.shape)
     self.compressed_hidden *= T.cast(mask, floatX)
     # Accumulate model parameters
     self.params += self.hidden_layer.params
     # Softmax Component
     self.softmax_layer = SoftmaxLayer(self.compressed_hidden,
                                       (config.num_mlp, config.num_class))
     self.raw_output = self.softmax_layer.output
     self.pred = self.softmax_layer.pred
     self.params += self.softmax_layer.params
     # Compute the total number of parameters in this model
     self.num_params_encoder = config.num_input * config.num_hidden + \
                               config.num_hidden * config.num_hidden * 2 + \
                               config.num_hidden + \
                               config.num_hidden * 3 * 2 + \
                               3
     self.num_params_classifier = config.num_hidden * config.num_mlp + \
                                  config.num_mlp + \
                                  config.num_mlp * config.num_class + \
                                  config.num_class
     self.num_params = self.num_params_encoder + self.num_params_classifier
     # Build target function
     self.truth = T.ivector(name='label')
     self.learn_rate = T.scalar(name='learning rate')
     self.cost = self.softmax_layer.NLL_loss(self.truth)
     # Build computational graph and compute the gradient of the target
     # function with respect to model parameters
     self.gradparams = T.grad(self.cost, self.params)
     # Updates formula for stochastic gradient descent algorithm
     self.updates = []
     for param, gradparam in zip(self.params, self.gradparams):
         self.updates.append((param, param - self.learn_rate * gradparam))
     # Compile theano function
     self.objective = theano.function(inputs=[self.input, self.truth],
                                      outputs=self.cost)
     self.predict = theano.function(inputs=[self.input], outputs=self.pred)
     # Compute the gradient of the objective function with respect to the model parameters
     self.compute_cost_and_gradient = theano.function(
         inputs=[self.input, self.truth],
         outputs=self.gradparams + [self.cost])
     # Output function for debugging purpose
     self.show_hidden = theano.function(inputs=[self.input, self.truth],
                                        outputs=self.hidden)
     self.show_compressed_hidden = theano.function(
         inputs=[self.input, self.truth], outputs=self.compressed_hidden)
     self.show_output = theano.function(inputs=[self.input, self.truth],
                                        outputs=self.raw_output)
     if verbose:
         logger.debug(
             'Architecture of GrCNN built finished, summarized as below: ')
         logger.debug('Input dimension: %d' % config.num_input)
         logger.debug('Hidden dimension inside GrCNNEncoder pyramid: %d' %
                      config.num_hidden)
         logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
         logger.debug('Number of target classes: %d' % config.num_class)
         logger.debug('Number of parameters in encoder part: %d' %
                      self.num_params_encoder)
         logger.debug('Number of parameters in classifier part: %d' %
                      self.num_params_classifier)
         logger.debug('Number of total parameters in this model: %d' %
                      self.num_params)
Exemplo n.º 11
0
class GrCNN(object):
    '''
    (Binary) Gated Recursive Convolutional Neural Network Classifier, with GRCNN as the 
    encoder part and MLP as the classifier part.
    '''
    def __init__(self, config=None, verbose=True):
        self.encoder = GrCNNEncoder(config, verbose)
        # Link two parts
        self.params = self.encoder.params
        self.input = self.encoder.input
        self.hidden = self.encoder.output
        # Activation function
        self.act = Activation(config.activation)
        # MLP Component
        self.hidden_layer = HiddenLayer(self.hidden,
                                        (config.num_hidden, config.num_mlp),
                                        act=Activation(config.hiddenact))
        self.compressed_hidden = self.hidden_layer.output
        # Dropout regularization
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1,
                             p=1 - config.dropout,
                             size=self.compressed_hidden.shape)
        self.compressed_hidden *= T.cast(mask, floatX)
        # Accumulate model parameters
        self.params += self.hidden_layer.params
        # Softmax Component
        self.softmax_layer = SoftmaxLayer(self.compressed_hidden,
                                          (config.num_mlp, config.num_class))
        self.raw_output = self.softmax_layer.output
        self.pred = self.softmax_layer.pred
        self.params += self.softmax_layer.params
        # Compute the total number of parameters in this model
        self.num_params_encoder = config.num_input * config.num_hidden + \
                                  config.num_hidden * config.num_hidden * 2 + \
                                  config.num_hidden + \
                                  config.num_hidden * 3 * 2 + \
                                  3
        self.num_params_classifier = config.num_hidden * config.num_mlp + \
                                     config.num_mlp + \
                                     config.num_mlp * config.num_class + \
                                     config.num_class
        self.num_params = self.num_params_encoder + self.num_params_classifier
        # Build target function
        self.truth = T.ivector(name='label')
        self.learn_rate = T.scalar(name='learning rate')
        self.cost = self.softmax_layer.NLL_loss(self.truth)
        # Build computational graph and compute the gradient of the target
        # function with respect to model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Updates formula for stochastic gradient descent algorithm
        self.updates = []
        for param, gradparam in zip(self.params, self.gradparams):
            self.updates.append((param, param - self.learn_rate * gradparam))
        # Compile theano function
        self.objective = theano.function(inputs=[self.input, self.truth],
                                         outputs=self.cost)
        self.predict = theano.function(inputs=[self.input], outputs=self.pred)
        # Compute the gradient of the objective function with respect to the model parameters
        self.compute_cost_and_gradient = theano.function(
            inputs=[self.input, self.truth],
            outputs=self.gradparams + [self.cost])
        # Output function for debugging purpose
        self.show_hidden = theano.function(inputs=[self.input, self.truth],
                                           outputs=self.hidden)
        self.show_compressed_hidden = theano.function(
            inputs=[self.input, self.truth], outputs=self.compressed_hidden)
        self.show_output = theano.function(inputs=[self.input, self.truth],
                                           outputs=self.raw_output)
        if verbose:
            logger.debug(
                'Architecture of GrCNN built finished, summarized as below: ')
            logger.debug('Input dimension: %d' % config.num_input)
            logger.debug('Hidden dimension inside GrCNNEncoder pyramid: %d' %
                         config.num_hidden)
            logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
            logger.debug('Number of target classes: %d' % config.num_class)
            logger.debug('Number of parameters in encoder part: %d' %
                         self.num_params_encoder)
            logger.debug('Number of parameters in classifier part: %d' %
                         self.num_params_classifier)
            logger.debug('Number of total parameters in this model: %d' %
                         self.num_params)

    def train(self, instance, label):
        '''
        @instance: np.ndarray. Two dimension matrix which corresponds to a time sequence.
        The first dimension along the matrix represents the time dimension while the second 
        dimension along the matrix represents the embedding dimension.
        @label: np.ndarray. 1 dimensional array of int as labels.
        @learn_rate: np.scalar. Learning rate of the stochastic gradient descent algorithm.
        '''
        cost = self.objective(instance, label)
        return cost

    def update_params(self, grads, learn_rate):
        '''
        @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
        @learn_rate: scalar. Learning rate.
        '''
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    def set_params(self, params):
        '''
        @params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
        '''
        for p, param in zip(self.params, params):
            p.set_value(param, borrow=True)

    @staticmethod
    def save(fname, model):
        '''
        @fname: String. Filename to store the model.
        @model: GrCNN. An instance of GrCNN classifier to be saved.
        '''
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        '''
        @fname: String. Filename to load the model.
        '''
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model