class GrCNNBagger(object): ''' Using the natural Hierarchical structure of GrCNN to build natural hierarchical summarization of sentence for model bagging, or, equivalently, mixture of experts. ''' def __init__(self, config, verbose=True): ''' @config: GRCNNConfiger. Configer used to set the architecture of GRCNNEncoder. ''' self.encoder = GrCNNEncoder(config, verbose) # Link two parts self.input = self.encoder.input # Activation function self.act = Activation(config.activation) # Extract the hierarchical representation, the pyramids, from the encoder # Combine the original time series and the compressed time series self.pyramids = self.encoder.pyramids self.pyramids = T.concatenate([ self.encoder.hidden0.dimshuffle('x', 0, 1), self.encoder.pyramids ]) self.nsteps = self.pyramids.shape[0] # Use another scan function to compress each hierarchical representation # into the vector representation self.hierarchies, _ = theano.scan( fn=self._step_compress, sequences=[T.arange(self.nsteps, 0, -1), self.pyramids]) # Global classifier, MLP, mixture of experts self.hidden_layer = HiddenLayer(self.hierarchies, (config.num_hidden, config.num_mlp), act=Activation(config.hiddenact)) # Adding dropout support self.hidden = self.hidden_layer.output srng = T.shared_randomstreams.RandomStreams(config.random_seed) mask = srng.binomial(n=1, p=1 - config.dropout, size=self.hidden.shape) self.hidden *= T.cast(mask, floatX) # Connect the hidden layer after dropout to a logistic output layer self.output_layer = LogisticLayer(self.hidden, config.num_mlp) self.experts = self.output_layer.output # Global weighting mechanism, voting weights self.weight_layer = theano.shared( name='Weighting vector', value=np.random.rand(config.num_hidden).astype(floatX)) self.weights = T.nnet.softmax( T.dot(self.hierarchies, self.weight_layer)) # Compute the total number of parameters in the model self.num_params = self.encoder.num_params + self.hidden_layer.num_params + \ self.output_layer.num_params + config.num_hidden # Final decision, bagging self.score = T.sum(T.flatten(self.experts) * T.flatten(self.weights)) # Prediction for classification self.pred = self.score >= 0.5 # Stack all the parameters self.params = [] self.params += self.encoder.params self.params += self.hidden_layer.params self.params += self.output_layer.params self.params += [self.weight_layer] # Build objective function for binary classification problem self.truth = T.iscalar(name='label') self.cost = -self.truth * T.log((self.score+np.finfo(float).eps) / (1+2*np.finfo(float).eps)) - \ (1-self.truth) * T.log((1.0-self.score+np.finfo(float).eps) / (1+2*np.finfo(float).eps)) ## Weight Decay if config.weight_decay: self.regularizer = self.encoder.L2_loss() + self.hidden_layer.L2_loss() + \ self.output_layer.L2_loss() + T.sum(self.weight_layer ** 2) self.regularizer *= config.weight_decay_parameter self.cost += self.regularizer # Construct gradient vectors self.gradparams = T.grad(self.cost, self.params) # Construct gradient for the input matrix, fine-tuning self.input_grads = T.grad(self.cost, self.input) # Build and compile theano functions self.predict = theano.function(inputs=[self.input], outputs=self.pred) self.bagging = theano.function(inputs=[self.input], outputs=self.score) self.compute_gradient_and_cost = theano.function( inputs=[self.input, self.truth], outputs=self.gradparams + [self.cost, self.pred]) self.compute_input_gradient = theano.function( inputs=[self.input, self.truth], outputs=self.input_grads) # Theano functions for debugging purposes self.show_weights = theano.function(inputs=[self.input], outputs=self.weights) self.show_scores = theano.function(inputs=[self.input], outputs=self.experts) self.show_hierarchy = theano.function(inputs=[self.input], outputs=self.hierarchies) self.show_prob = theano.function(inputs=[self.input], outputs=self.score) self.show_cost = theano.function(inputs=[self.input, self.truth], outputs=self.cost) if verbose: logger.debug('GrCNNBagger built finished...') logger.debug( 'Hierarchical structure of GrCNN for classification...') logger.debug('Total number of parameters in the model: %d' % self.num_params) def _step_compress(self, iter, level): ''' @iter: Int. Index to compress the 0:iter matrices along the first dimension. @level: theano symbolic matrix. A time series to be compressed. ''' return T.mean(level[:iter, :], axis=0) def update_params(self, grads, learn_rate): ''' @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters. @learn_rate: scalar. Learning rate. ''' for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) def set_params(self, params): ''' @params: [np.ndarray]. List of numpy.ndarray to set the model parameters. ''' for p, param in zip(self.params, params): p.set_value(param, borrow=True) def deepcopy(self, bagger): ''' @grcnn: GrCNNBagger. Copy the model parameters of another GrCNNMatchScorer and use it. ''' assert len(self.params) == len(bagger.params) for p, param in zip(self.params, bagger.params): val = param.get_value() p.set_value(val) @staticmethod def save(fname, model): ''' @fname: String. File name to store the model. @model: An instance of GrCNNBagger to be saved. ''' with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): ''' @fname: String. File name to load the model. ''' with file(fname, 'rb') as fin: model = cPickle.load(fin) return model