class MLPRanker(object): def __init__(self, verbose=True): if verbose: logger.debug('Build Multilayer Perceptron Ranking model...') # Positive input setting self.inputPL = T.matrix(name='inputPL', dtype=floatX) self.inputPR = T.matrix(name='inputPR', dtype=floatX) # Negative input setting self.inputNL = T.matrix(name='inputNL', dtype=floatX) self.inputNR = T.matrix(name='inputNR', dtype=floatX) # Standard input setting self.inputL = T.matrix(name='inputL', dtype=floatX) self.inputR = T.matrix(name='inputR', dtype=floatX) # Build activation function self.act = Activation('tanh') # Connect input matrices self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1) self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1) self.input = T.concatenate([self.inputL, self.inputR], axis=1) # Build hidden layer self.hidden_layer = HiddenLayer(self.input, (2 * edim, args.hidden), act=self.act) self.hidden = self.hidden_layer.output self.hiddenP = self.hidden_layer.encode(self.inputP) self.hiddenN = self.hidden_layer.encode(self.inputN) # Dropout parameter - test self.thidden = (1 - args.dropout) * self.hidden self.thiddenP = (1 - args.dropout) * self.hiddenP self.thiddenN = (1 - args.dropout) * self.hiddenN # Dropout parameter - train srng = T.shared_randomstreams.RandomStreams(args.seed) mask = srng.binomial(n=1, p=1 - args.dropout, size=self.hidden.shape) maskP = srng.binomial(n=1, p=1 - args.dropout, size=self.hiddenP.shape) maskN = srng.binomial(n=1, p=1 - args.dropout, size=self.hiddenN.shape) self.hidden *= T.cast(mask, floatX) self.hiddenP *= T.cast(maskP, floatX) self.hiddenN *= T.cast(maskN, floatX) # Build linear output layer self.score_layer = ScoreLayer(self.hidden, args.hidden) self.output = self.score_layer.output self.scoreP = self.score_layer.encode(self.hiddenP) self.scoreN = self.score_layer.encode(self.hiddenN) # Build for test self.toutput = self.score_layer.encode(self.thidden) self.tscoreP = self.score_layer.encode(self.thiddenP) self.tscoreN = self.score_layer.encode(self.thiddenN) # Stack all the parameters self.params = [] self.params += self.hidden_layer.params self.params += self.score_layer.params # Build cost function self.cost = T.mean( T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN)) # Construct the gradient of the cost function with respect to the model parameters self.gradparams = T.grad(self.cost, self.params) # Count the total number of parameters in this model self.num_params = edim * args.hidden + args.hidden + args.hidden + 1 # Build class method self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.toutput) self.compute_cost_and_gradient = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN]) self.show_scores = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.tscoreP, self.tscoreN]) if verbose: logger.debug( 'Architecture of MLP Ranker built finished, summarized below: ' ) logger.debug('Input dimension: %d' % edim) logger.debug('Hidden dimension: %d' % args.hidden) logger.debug('Total number of parameters used in the model: %d' % self.num_params) def update_params(self, grads, learn_rate): for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) @staticmethod def save(fname, model): with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): with file(fname, 'rb') as fin: model = cPickle.load(fin) return model
class ExtGrCNNMatchScorer(object): ''' Extended Gated Recursive Convolutional Neural Network for matching task. The last layer of the model includes a linear layer for regression. ''' def __init__(self, config=None, verbose=True): # Construct two GrCNNEncoders for matching two sentences self.encoderL = ExtGrCNNEncoder(config, verbose) self.encoderR = ExtGrCNNEncoder(config, verbose) # Link the parameters of two parts self.params = [] self.params += self.encoderL.params self.params += self.encoderR.params # Build three kinds of inputs: # 1, inputL, inputR. This pair is used for computing the score after training # 2, inputPL, inputPR. This part is used for training positive pairs # 3, inputNL, inputNR. This part is used for training negative pairs self.inputL = self.encoderL.input self.inputR = self.encoderR.input # Positive self.inputPL = T.matrix(name='inputPL', dtype=floatX) self.inputPR = T.matrix(name='inputPR', dtype=floatX) # Negative self.inputNL = T.matrix(name='inputNL', dtype=floatX) self.inputNR = T.matrix(name='inputNR', dtype=floatX) # Linking input-output mapping self.hiddenL = self.encoderL.output self.hiddenR = self.encoderR.output # Positive self.hiddenPL = self.encoderL.encode(self.inputPL) self.hiddenPR = self.encoderR.encode(self.inputPR) # Negative self.hiddenNL = self.encoderL.encode(self.inputNL) self.hiddenNR = self.encoderR.encode(self.inputNR) # Activation function self.act = Activation(config.activation) # MLP Component self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1) self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1) self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1) # Build hidden layer self.hidden_layer = HiddenLayer(self.hidden, (2*config.num_hidden, config.num_mlp), act=Activation(config.hiddenact)) self.compressed_hidden = self.hidden_layer.output self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP) self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN) # Accumulate parameters self.params += self.hidden_layer.params # Dropout parameter srng = T.shared_randomstreams.RandomStreams(config.random_seed) mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape) maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape) maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape) self.compressed_hidden *= T.cast(mask, floatX) self.compressed_hiddenP *= T.cast(maskP, floatX) self.compressed_hiddenN *= T.cast(maskN, floatX) # Score layers self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp) self.output = self.score_layer.output self.scoreP = self.score_layer.encode(self.compressed_hiddenP) self.scoreN = self.score_layer.encode(self.compressed_hiddenN) # Accumulate parameters self.params += self.score_layer.params # Build cost function self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN)) # Construct the gradient of the cost function with respect to the model parameters self.gradparams = T.grad(self.cost, self.params) # Compute the total number of parameters in the model self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \ config.num_mlp + \ config.num_mlp + 1 self.num_params = self.num_params_encoder + self.num_params_classifier # Build class methods self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output) self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN]) self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.scoreP, self.scoreN]) self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.hiddenP, self.hiddenN]) self.show_inputs = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR]) if verbose: logger.debug('Architecture of ExtGrCNNMatchScorer built finished, summarized below: ') logger.debug('Input dimension: %d' % config.num_input) logger.debug('Hidden dimension inside GrCNNMatchScorer pyramid: %d' % config.num_hidden) logger.debug('Hidden dimension MLP: %d' % config.num_mlp) logger.debug('Number of Gating functions: %d' % config.num_gates) logger.debug('There are 2 ExtGrCNNEncoders used in model.') logger.debug('Total number of parameters used in the model: %d' % self.num_params) def update_params(self, grads, learn_rate): ''' @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters. @learn_rate: scalar. Learning rate. ''' for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) def set_params(self, params): ''' @params: [np.ndarray]. List of numpy.ndarray to set the model parameters. ''' for p, param in zip(self.params, params): p.set_value(param, borrow=True) def deepcopy(self, grcnn): ''' @grcnn: GrCNNMatchScorer. Copy the model parameters of another GrCNNMatchScorer and use it. ''' assert len(self.params) == len(grcnn.params) for p, param in zip(self.params, grcnn.params): val = param.get_value() p.set_value(val) @staticmethod def save(fname, model): ''' @fname: String. Filename to store the model. @model: GrCNNMatchScorer. An instance of GrCNNMatchScorer to be saved. ''' with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): ''' @fname: String. Filename to load the model. ''' with file(fname, 'rb') as fin: model = cPickle.load(fin) return model
class BRNNMatchScorer(object): ''' Bidirectional RNN for text matching as a classification problem. ''' def __init__(self, config, verbose=True): # Construct two BRNNEncoders for matching two sentences self.encoderL = BRNNEncoder(config, verbose) self.encoderR = BRNNEncoder(config, verbose) # Link two parts self.params = [] self.params += self.encoderL.params self.params += self.encoderR.params # Set up input # Note that there are three kinds of inputs altogether, including: # 1, inputL, inputR. This pair is used for computing the score after training # 2, inputPL, inputPR. This pair is used for training positive pairs # 3, inputNL, inputNR. This pair is used for training negative pairs self.inputL = self.encoderL.input self.inputR = self.encoderR.input # Positive self.inputPL = T.matrix(name='inputPL', dtype=floatX) self.inputPR = T.matrix(name='inputPR', dtype=floatX) # Negative self.inputNL = T.matrix(name='inputNL', dtype=floatX) self.inputNR = T.matrix(name='inputNR', dtype=floatX) # Get output of two BRNNEncoders self.hiddenL = self.encoderL.output self.hiddenR = self.encoderR.output # Positive Hidden self.hiddenPL = self.encoderL.encode(self.inputPL) self.hiddenPR = self.encoderR.encode(self.inputPR) # Negative Hidden self.hiddenNL = self.encoderL.encode(self.inputNL) self.hiddenNR = self.encoderR.encode(self.inputNR) # Activation function self.act = Activation(config.activation) self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0) self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=0) self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=0) # Build hidden layer self.hidden_layer = HiddenLayer(self.hidden, (4*config.num_hidden, config.num_mlp), act=Activation(config.hiddenact)) self.compressed_hidden = self.hidden_layer.output self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP) self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN) # Accumulate parameters self.params += self.hidden_layer.params # Dropout parameter srng = T.shared_randomstreams.RandomStreams(config.random_seed) mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape) maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape) maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape) self.compressed_hidden *= T.cast(mask, floatX) self.compressed_hiddenP *= T.cast(maskP, floatX) self.compressed_hiddenN *= T.cast(maskN, floatX) # Score layer self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp) self.output = self.score_layer.output self.scoreP = self.score_layer.encode(self.compressed_hiddenP) self.scoreN = self.score_layer.encode(self.compressed_hiddenN) # Accumulate parameters self.params += self.score_layer.params # Build cost function self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN)) # Construct the total number of parameters in the model self.gradparams = T.grad(self.cost, self.params) # Compute the total number of parameters in the model self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \ config.num_mlp + 1 self.num_params = self.num_params_encoder + self.num_params_classifier # Build class functions self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output) # Compute the gradient of the objective function and cost and prediction self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN]) # Output function for debugging purpose self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.scoreP, self.scoreN]) self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.hiddenP, self.hiddenN]) if verbose: logger.debug('Architecture of BRNNMatchScorer built finished, summarized below: ') logger.debug('Input dimension: %d' % config.num_input) logger.debug('Hidden dimension of RNN: %d' % config.num_hidden) logger.debug('Hidden dimension of MLP: %d' % config.num_mlp) logger.debug('There are 2 BRNNEncoders used in the model.') logger.debug('Total number of parameters in this model: %d' % self.num_params) def update_params(self, grads, learn_rate): ''' @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters. They are the corresponding gradients of model parameters. @learn_rate: scalar. Learning rate. ''' for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) def set_params(self, params): ''' @params: [np.ndarray]. List of numpy.ndarray to set the model parameters. ''' for p, param in zip(self.params, params): p.set_value(param, borrow=True) def deepcopy(self, brnn): ''' @brnn: BRNNMatchScorer. Copy the model parameters of another BRNNMatchScorer. ''' assert len(self.params) == len(brnn.params) for p, param in zip(self.params, brnn.params): val = param.get_value() p.set_value(val) @staticmethod def save(fname, model): ''' @fname: String. Filename to store the model. @model: BRNNMatcher. An instance of BRNNMatcher to be saved. ''' with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): ''' @fname: String. Filename to load the model. ''' with file(fname, 'rb') as fin: model = cPickle.load(fin) return model
class BRNNMatchScorer(object): ''' Bidirectional RNN for text matching as a classification problem. ''' def __init__(self, config, verbose=True): # Construct two BRNNEncoders for matching two sentences self.encoderL = BRNNEncoder(config, verbose) self.encoderR = BRNNEncoder(config, verbose) # Link two parts self.params = [] self.params += self.encoderL.params self.params += self.encoderR.params # Set up input # Note that there are three kinds of inputs altogether, including: # 1, inputL, inputR. This pair is used for computing the score after training # 2, inputPL, inputPR. This pair is used for training positive pairs # 3, inputNL, inputNR. This pair is used for training negative pairs self.inputL = self.encoderL.input self.inputR = self.encoderR.input # Positive self.inputPL = T.matrix(name='inputPL', dtype=floatX) self.inputPR = T.matrix(name='inputPR', dtype=floatX) # Negative self.inputNL = T.matrix(name='inputNL', dtype=floatX) self.inputNR = T.matrix(name='inputNR', dtype=floatX) # Get output of two BRNNEncoders self.hiddenL = self.encoderL.output self.hiddenR = self.encoderR.output # Positive Hidden self.hiddenPL = self.encoderL.encode(self.inputPL) self.hiddenPR = self.encoderR.encode(self.inputPR) # Negative Hidden self.hiddenNL = self.encoderL.encode(self.inputNL) self.hiddenNR = self.encoderR.encode(self.inputNR) # Activation function self.act = Activation(config.activation) self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0) self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=0) self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=0) # Build hidden layer self.hidden_layer = HiddenLayer( self.hidden, (4 * config.num_hidden, config.num_mlp), act=Activation(config.hiddenact)) self.compressed_hidden = self.hidden_layer.output self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP) self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN) # Accumulate parameters self.params += self.hidden_layer.params # Dropout parameter srng = T.shared_randomstreams.RandomStreams(config.random_seed) mask = srng.binomial(n=1, p=1 - config.dropout, size=self.compressed_hidden.shape) maskP = srng.binomial(n=1, p=1 - config.dropout, size=self.compressed_hiddenP.shape) maskN = srng.binomial(n=1, p=1 - config.dropout, size=self.compressed_hiddenN.shape) self.compressed_hidden *= T.cast(mask, floatX) self.compressed_hiddenP *= T.cast(maskP, floatX) self.compressed_hiddenN *= T.cast(maskN, floatX) # Score layer self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp) self.output = self.score_layer.output self.scoreP = self.score_layer.encode(self.compressed_hiddenP) self.scoreN = self.score_layer.encode(self.compressed_hiddenN) # Accumulate parameters self.params += self.score_layer.params # Build cost function self.cost = T.mean( T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN)) # Construct the total number of parameters in the model self.gradparams = T.grad(self.cost, self.params) # Compute the total number of parameters in the model self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \ config.num_mlp + 1 self.num_params = self.num_params_encoder + self.num_params_classifier # Build class functions self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output) # Compute the gradient of the objective function and cost and prediction self.compute_cost_and_gradient = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN]) # Output function for debugging purpose self.show_scores = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.scoreP, self.scoreN]) self.show_hiddens = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.hiddenP, self.hiddenN]) if verbose: logger.debug( 'Architecture of BRNNMatchScorer built finished, summarized below: ' ) logger.debug('Input dimension: %d' % config.num_input) logger.debug('Hidden dimension of RNN: %d' % config.num_hidden) logger.debug('Hidden dimension of MLP: %d' % config.num_mlp) logger.debug('There are 2 BRNNEncoders used in the model.') logger.debug('Total number of parameters in this model: %d' % self.num_params) def update_params(self, grads, learn_rate): ''' @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters. They are the corresponding gradients of model parameters. @learn_rate: scalar. Learning rate. ''' for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) def set_params(self, params): ''' @params: [np.ndarray]. List of numpy.ndarray to set the model parameters. ''' for p, param in zip(self.params, params): p.set_value(param, borrow=True) def deepcopy(self, brnn): ''' @brnn: BRNNMatchScorer. Copy the model parameters of another BRNNMatchScorer. ''' assert len(self.params) == len(brnn.params) for p, param in zip(self.params, brnn.params): val = param.get_value() p.set_value(val) @staticmethod def save(fname, model): ''' @fname: String. Filename to store the model. @model: BRNNMatcher. An instance of BRNNMatcher to be saved. ''' with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): ''' @fname: String. Filename to load the model. ''' with file(fname, 'rb') as fin: model = cPickle.load(fin) return model
class MLPRanker(object): def __init__(self, verbose=True): if verbose: logger.debug('Build Multilayer Perceptron Ranking model...') # Positive input setting self.inputPL = T.matrix(name='inputPL', dtype=floatX) self.inputPR = T.matrix(name='inputPR', dtype=floatX) # Negative input setting self.inputNL = T.matrix(name='inputNL', dtype=floatX) self.inputNR = T.matrix(name='inputNR', dtype=floatX) # Standard input setting self.inputL = T.matrix(name='inputL', dtype=floatX) self.inputR = T.matrix(name='inputR', dtype=floatX) # Build activation function self.act = Activation('tanh') # Connect input matrices self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1) self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1) self.input = T.concatenate([self.inputL, self.inputR], axis=1) # Build hidden layer self.hidden_layer = HiddenLayer(self.input, (2*edim, args.hidden), act=self.act) self.hidden = self.hidden_layer.output self.hiddenP = self.hidden_layer.encode(self.inputP) self.hiddenN = self.hidden_layer.encode(self.inputN) # Dropout parameter - test self.thidden = (1-args.dropout) * self.hidden self.thiddenP = (1-args.dropout) * self.hiddenP self.thiddenN = (1-args.dropout) * self.hiddenN # Dropout parameter - train srng = T.shared_randomstreams.RandomStreams(args.seed) mask = srng.binomial(n=1, p=1-args.dropout, size=self.hidden.shape) maskP = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenP.shape) maskN = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenN.shape) self.hidden *= T.cast(mask, floatX) self.hiddenP *= T.cast(maskP, floatX) self.hiddenN *= T.cast(maskN, floatX) # Build linear output layer self.score_layer = ScoreLayer(self.hidden, args.hidden) self.output = self.score_layer.output self.scoreP = self.score_layer.encode(self.hiddenP) self.scoreN = self.score_layer.encode(self.hiddenN) # Build for test self.toutput = self.score_layer.encode(self.thidden) self.tscoreP = self.score_layer.encode(self.thiddenP) self.tscoreN = self.score_layer.encode(self.thiddenN) # Stack all the parameters self.params = [] self.params += self.hidden_layer.params self.params += self.score_layer.params # Build cost function self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0-self.scoreP+self.scoreN)) # Construct the gradient of the cost function with respect to the model parameters self.gradparams = T.grad(self.cost, self.params) # Count the total number of parameters in this model self.num_params = edim * args.hidden + args.hidden + args.hidden + 1 # Build class method self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.toutput) self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN]) self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.tscoreP, self.tscoreN]) if verbose: logger.debug('Architecture of MLP Ranker built finished, summarized below: ') logger.debug('Input dimension: %d' % edim) logger.debug('Hidden dimension: %d' % args.hidden) logger.debug('Total number of parameters used in the model: %d' % self.num_params) def update_params(self, grads, learn_rate): for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) @staticmethod def save(fname, model): with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): with file(fname, 'rb') as fin: model = cPickle.load(fin) return model
class ExtGrCNNMatchScorer(object): ''' Extended Gated Recursive Convolutional Neural Network for matching task. The last layer of the model includes a linear layer for regression. ''' def __init__(self, config=None, verbose=True): # Construct two GrCNNEncoders for matching two sentences self.encoderL = ExtGrCNNEncoder(config, verbose) self.encoderR = ExtGrCNNEncoder(config, verbose) # Link the parameters of two parts self.params = [] self.params += self.encoderL.params self.params += self.encoderR.params # Build three kinds of inputs: # 1, inputL, inputR. This pair is used for computing the score after training # 2, inputPL, inputPR. This part is used for training positive pairs # 3, inputNL, inputNR. This part is used for training negative pairs self.inputL = self.encoderL.input self.inputR = self.encoderR.input # Positive self.inputPL = T.matrix(name='inputPL', dtype=floatX) self.inputPR = T.matrix(name='inputPR', dtype=floatX) # Negative self.inputNL = T.matrix(name='inputNL', dtype=floatX) self.inputNR = T.matrix(name='inputNR', dtype=floatX) # Linking input-output mapping self.hiddenL = self.encoderL.output self.hiddenR = self.encoderR.output # Positive self.hiddenPL = self.encoderL.encode(self.inputPL) self.hiddenPR = self.encoderR.encode(self.inputPR) # Negative self.hiddenNL = self.encoderL.encode(self.inputNL) self.hiddenNR = self.encoderR.encode(self.inputNR) # Activation function self.act = Activation(config.activation) # MLP Component self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1) self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1) self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1) # Build hidden layer self.hidden_layer = HiddenLayer( self.hidden, (2 * config.num_hidden, config.num_mlp), act=Activation(config.hiddenact)) self.compressed_hidden = self.hidden_layer.output self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP) self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN) # Accumulate parameters self.params += self.hidden_layer.params # Dropout parameter srng = T.shared_randomstreams.RandomStreams(config.random_seed) mask = srng.binomial(n=1, p=1 - config.dropout, size=self.compressed_hidden.shape) maskP = srng.binomial(n=1, p=1 - config.dropout, size=self.compressed_hiddenP.shape) maskN = srng.binomial(n=1, p=1 - config.dropout, size=self.compressed_hiddenN.shape) self.compressed_hidden *= T.cast(mask, floatX) self.compressed_hiddenP *= T.cast(maskP, floatX) self.compressed_hiddenN *= T.cast(maskN, floatX) # Score layers self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp) self.output = self.score_layer.output self.scoreP = self.score_layer.encode(self.compressed_hiddenP) self.scoreN = self.score_layer.encode(self.compressed_hiddenN) # Accumulate parameters self.params += self.score_layer.params # Build cost function self.cost = T.mean( T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN)) # Construct the gradient of the cost function with respect to the model parameters self.gradparams = T.grad(self.cost, self.params) # Compute the total number of parameters in the model self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \ config.num_mlp + \ config.num_mlp + 1 self.num_params = self.num_params_encoder + self.num_params_classifier # Build class methods self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output) self.compute_cost_and_gradient = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN]) self.show_scores = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.scoreP, self.scoreN]) self.show_hiddens = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.hiddenP, self.hiddenN]) self.show_inputs = theano.function( inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR]) if verbose: logger.debug( 'Architecture of ExtGrCNNMatchScorer built finished, summarized below: ' ) logger.debug('Input dimension: %d' % config.num_input) logger.debug( 'Hidden dimension inside GrCNNMatchScorer pyramid: %d' % config.num_hidden) logger.debug('Hidden dimension MLP: %d' % config.num_mlp) logger.debug('Number of Gating functions: %d' % config.num_gates) logger.debug('There are 2 ExtGrCNNEncoders used in model.') logger.debug('Total number of parameters used in the model: %d' % self.num_params) def update_params(self, grads, learn_rate): ''' @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters. @learn_rate: scalar. Learning rate. ''' for param, grad in zip(self.params, grads): p = param.get_value(borrow=True) param.set_value(p - learn_rate * grad, borrow=True) def set_params(self, params): ''' @params: [np.ndarray]. List of numpy.ndarray to set the model parameters. ''' for p, param in zip(self.params, params): p.set_value(param, borrow=True) def deepcopy(self, grcnn): ''' @grcnn: GrCNNMatchScorer. Copy the model parameters of another GrCNNMatchScorer and use it. ''' assert len(self.params) == len(grcnn.params) for p, param in zip(self.params, grcnn.params): val = param.get_value() p.set_value(val) @staticmethod def save(fname, model): ''' @fname: String. Filename to store the model. @model: GrCNNMatchScorer. An instance of GrCNNMatchScorer to be saved. ''' with file(fname, 'wb') as fout: cPickle.dump(model, fout) @staticmethod def load(fname): ''' @fname: String. Filename to load the model. ''' with file(fname, 'rb') as fin: model = cPickle.load(fin) return model