示例#1
0
class GrCNNBagger(object):
    '''
    Using the natural Hierarchical structure of GrCNN to build natural 
    hierarchical summarization of sentence for model bagging, or, equivalently,
    mixture of experts.
    '''
    def __init__(self, config, verbose=True):
        '''
        @config: GRCNNConfiger. Configer used to set the architecture of GRCNNEncoder.
        '''
        self.encoder = GrCNNEncoder(config, verbose)
        # Link two parts
        self.input = self.encoder.input
        # Activation function
        self.act = Activation(config.activation)
        # Extract the hierarchical representation, the pyramids, from the encoder
        # Combine the original time series and the compressed time series
        self.pyramids = self.encoder.pyramids
        self.pyramids = T.concatenate([
            self.encoder.hidden0.dimshuffle('x', 0, 1), self.encoder.pyramids
        ])
        self.nsteps = self.pyramids.shape[0]
        # Use another scan function to compress each hierarchical representation
        # into the vector representation
        self.hierarchies, _ = theano.scan(
            fn=self._step_compress,
            sequences=[T.arange(self.nsteps, 0, -1), self.pyramids])
        # Global classifier, MLP, mixture of experts
        self.hidden_layer = HiddenLayer(self.hierarchies,
                                        (config.num_hidden, config.num_mlp),
                                        act=Activation(config.hiddenact))
        # Adding dropout support
        self.hidden = self.hidden_layer.output
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1, p=1 - config.dropout, size=self.hidden.shape)
        self.hidden *= T.cast(mask, floatX)
        # Connect the hidden layer after dropout to a logistic output layer
        self.output_layer = LogisticLayer(self.hidden, config.num_mlp)
        self.experts = self.output_layer.output
        # Global weighting mechanism, voting weights
        self.weight_layer = theano.shared(
            name='Weighting vector',
            value=np.random.rand(config.num_hidden).astype(floatX))
        self.weights = T.nnet.softmax(
            T.dot(self.hierarchies, self.weight_layer))
        # Compute the total number of parameters in the model
        self.num_params = self.encoder.num_params + self.hidden_layer.num_params + \
                          self.output_layer.num_params + config.num_hidden
        # Final decision, bagging
        self.score = T.sum(T.flatten(self.experts) * T.flatten(self.weights))
        # Prediction for classification
        self.pred = self.score >= 0.5
        # Stack all the parameters
        self.params = []
        self.params += self.encoder.params
        self.params += self.hidden_layer.params
        self.params += self.output_layer.params
        self.params += [self.weight_layer]
        # Build objective function for binary classification problem
        self.truth = T.iscalar(name='label')
        self.cost = -self.truth * T.log((self.score+np.finfo(float).eps) / (1+2*np.finfo(float).eps)) - \
                    (1-self.truth) * T.log((1.0-self.score+np.finfo(float).eps) / (1+2*np.finfo(float).eps))
        ## Weight Decay
        if config.weight_decay:
            self.regularizer = self.encoder.L2_loss() + self.hidden_layer.L2_loss() + \
                               self.output_layer.L2_loss() + T.sum(self.weight_layer ** 2)
            self.regularizer *= config.weight_decay_parameter
            self.cost += self.regularizer
        # Construct gradient vectors
        self.gradparams = T.grad(self.cost, self.params)
        # Construct gradient for the input matrix, fine-tuning
        self.input_grads = T.grad(self.cost, self.input)
        # Build and compile theano functions
        self.predict = theano.function(inputs=[self.input], outputs=self.pred)
        self.bagging = theano.function(inputs=[self.input], outputs=self.score)
        self.compute_gradient_and_cost = theano.function(
            inputs=[self.input, self.truth],
            outputs=self.gradparams + [self.cost, self.pred])
        self.compute_input_gradient = theano.function(
            inputs=[self.input, self.truth], outputs=self.input_grads)
        # Theano functions for debugging purposes
        self.show_weights = theano.function(inputs=[self.input],
                                            outputs=self.weights)
        self.show_scores = theano.function(inputs=[self.input],
                                           outputs=self.experts)
        self.show_hierarchy = theano.function(inputs=[self.input],
                                              outputs=self.hierarchies)
        self.show_prob = theano.function(inputs=[self.input],
                                         outputs=self.score)
        self.show_cost = theano.function(inputs=[self.input, self.truth],
                                         outputs=self.cost)
        if verbose:
            logger.debug('GrCNNBagger built finished...')
            logger.debug(
                'Hierarchical structure of GrCNN for classification...')
            logger.debug('Total number of parameters in the model: %d' %
                         self.num_params)

    def _step_compress(self, iter, level):
        '''
        @iter: Int. Index to compress the 0:iter matrices along the first dimension.
        @level: theano symbolic matrix. A time series to be compressed.
        '''
        return T.mean(level[:iter, :], axis=0)

    def update_params(self, grads, learn_rate):
        '''
        @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
        @learn_rate: scalar. Learning rate.
        '''
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    def set_params(self, params):
        '''
        @params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
        '''
        for p, param in zip(self.params, params):
            p.set_value(param, borrow=True)

    def deepcopy(self, bagger):
        '''
        @grcnn: GrCNNBagger. Copy the model parameters of another GrCNNMatchScorer and use it.
        '''
        assert len(self.params) == len(bagger.params)
        for p, param in zip(self.params, bagger.params):
            val = param.get_value()
            p.set_value(val)

    @staticmethod
    def save(fname, model):
        '''
        @fname: String. File name to store the model.
        @model: An instance of GrCNNBagger to be saved.
        '''
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        '''
        @fname: String. File name to load the model.
        '''
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model