class MLPRanker(object):
    def __init__(self, verbose=True):
        if verbose:
            logger.debug('Build Multilayer Perceptron Ranking model...')
        # Positive input setting
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative input setting
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Standard input setting
        self.inputL = T.matrix(name='inputL', dtype=floatX)
        self.inputR = T.matrix(name='inputR', dtype=floatX)
        # Build activation function
        self.act = Activation('tanh')
        # Connect input matrices
        self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1)
        self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1)
        self.input = T.concatenate([self.inputL, self.inputR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(self.input, (2 * edim, args.hidden),
                                        act=self.act)
        self.hidden = self.hidden_layer.output
        self.hiddenP = self.hidden_layer.encode(self.inputP)
        self.hiddenN = self.hidden_layer.encode(self.inputN)
        # Dropout parameter - test
        self.thidden = (1 - args.dropout) * self.hidden
        self.thiddenP = (1 - args.dropout) * self.hiddenP
        self.thiddenN = (1 - args.dropout) * self.hiddenN
        # Dropout parameter - train
        srng = T.shared_randomstreams.RandomStreams(args.seed)
        mask = srng.binomial(n=1, p=1 - args.dropout, size=self.hidden.shape)
        maskP = srng.binomial(n=1, p=1 - args.dropout, size=self.hiddenP.shape)
        maskN = srng.binomial(n=1, p=1 - args.dropout, size=self.hiddenN.shape)
        self.hidden *= T.cast(mask, floatX)
        self.hiddenP *= T.cast(maskP, floatX)
        self.hiddenN *= T.cast(maskN, floatX)
        # Build linear output layer
        self.score_layer = ScoreLayer(self.hidden, args.hidden)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.hiddenP)
        self.scoreN = self.score_layer.encode(self.hiddenN)
        # Build for test
        self.toutput = self.score_layer.encode(self.thidden)
        self.tscoreP = self.score_layer.encode(self.thiddenP)
        self.tscoreN = self.score_layer.encode(self.thiddenN)
        # Stack all the parameters
        self.params = []
        self.params += self.hidden_layer.params
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(
            T.maximum(T.zeros_like(self.scoreP),
                      1.0 - self.scoreP + self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Count the total number of parameters in this model
        self.num_params = edim * args.hidden + args.hidden + args.hidden + 1
        # Build class method
        self.score = theano.function(inputs=[self.inputL, self.inputR],
                                     outputs=self.toutput)
        self.compute_cost_and_gradient = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=[self.tscoreP, self.tscoreN])
        if verbose:
            logger.debug(
                'Architecture of MLP Ranker built finished, summarized below: '
            )
            logger.debug('Input dimension: %d' % edim)
            logger.debug('Hidden dimension: %d' % args.hidden)
            logger.debug('Total number of parameters used in the model: %d' %
                         self.num_params)

    def update_params(self, grads, learn_rate):
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    @staticmethod
    def save(fname, model):
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model
示例#2
0
class ExtGrCNNMatchScorer(object):
    '''
    Extended Gated Recursive Convolutional Neural Network for matching task. The last 
    layer of the model includes a linear layer for regression.
    '''
    def __init__(self, config=None, verbose=True):
        # Construct two GrCNNEncoders for matching two sentences
        self.encoderL = ExtGrCNNEncoder(config, verbose)
        self.encoderR = ExtGrCNNEncoder(config, verbose)
        # Link the parameters of two parts
        self.params = []
        self.params += self.encoderL.params
        self.params += self.encoderR.params
        # Build three kinds of inputs:
        # 1, inputL, inputR. This pair is used for computing the score after training
        # 2, inputPL, inputPR. This part is used for training positive pairs
        # 3, inputNL, inputNR. This part is used for training negative pairs
        self.inputL = self.encoderL.input
        self.inputR = self.encoderR.input
        # Positive
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Linking input-output mapping
        self.hiddenL = self.encoderL.output
        self.hiddenR = self.encoderR.output
        # Positive 
        self.hiddenPL = self.encoderL.encode(self.inputPL)
        self.hiddenPR = self.encoderR.encode(self.inputPR)
        # Negative
        self.hiddenNL = self.encoderL.encode(self.inputNL)
        self.hiddenNR = self.encoderR.encode(self.inputNR)
        # Activation function
        self.act = Activation(config.activation)
        # MLP Component
        self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1)
        self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1)
        self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(self.hidden, (2*config.num_hidden, config.num_mlp), act=Activation(config.hiddenact))
        self.compressed_hidden = self.hidden_layer.output
        self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
        self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
        # Accumulate parameters
        self.params += self.hidden_layer.params
        # Dropout parameter
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape)
        maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape)
        maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape)
        self.compressed_hidden *= T.cast(mask, floatX)
        self.compressed_hiddenP *= T.cast(maskP, floatX)
        self.compressed_hiddenN *= T.cast(maskN, floatX)
        # Score layers
        self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
        self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
        # Accumulate parameters
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Compute the total number of parameters in the model
        self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
        self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \
                                     config.num_mlp + \
                                     config.num_mlp + 1
        self.num_params = self.num_params_encoder + self.num_params_classifier
        # Build class methods
        self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
        self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, 
                                                                 self.inputNL, self.inputNR],
                                                         outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                           outputs=[self.scoreP, self.scoreN])
        self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                            outputs=[self.hiddenP, self.hiddenN])
        self.show_inputs = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                           outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR])

        if verbose:
            logger.debug('Architecture of ExtGrCNNMatchScorer built finished, summarized below: ')
            logger.debug('Input dimension: %d' % config.num_input)
            logger.debug('Hidden dimension inside GrCNNMatchScorer pyramid: %d' % config.num_hidden)
            logger.debug('Hidden dimension MLP: %d' % config.num_mlp)
            logger.debug('Number of Gating functions: %d' % config.num_gates)
            logger.debug('There are 2 ExtGrCNNEncoders used in model.')
            logger.debug('Total number of parameters used in the model: %d' % self.num_params)

    def update_params(self, grads, learn_rate): 
        '''
        @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
        @learn_rate: scalar. Learning rate.
        '''
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    def set_params(self, params):
        '''
        @params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
        '''
        for p, param in zip(self.params, params):
            p.set_value(param, borrow=True)

    def deepcopy(self, grcnn):
        '''
        @grcnn: GrCNNMatchScorer. Copy the model parameters of another GrCNNMatchScorer and use it.
        '''
        assert len(self.params) == len(grcnn.params)
        for p, param in zip(self.params, grcnn.params):
            val = param.get_value()
            p.set_value(val)

    @staticmethod
    def save(fname, model):
        '''
        @fname: String. Filename to store the model.
        @model: GrCNNMatchScorer. An instance of GrCNNMatchScorer to be saved.
        '''
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        '''
        @fname: String. Filename to load the model.
        '''
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model
示例#3
0
class BRNNMatchScorer(object):
	'''
	Bidirectional RNN for text matching as a classification problem.
	'''
	def __init__(self, config, verbose=True):
		# Construct two BRNNEncoders for matching two sentences
		self.encoderL = BRNNEncoder(config, verbose)
		self.encoderR = BRNNEncoder(config, verbose)
		# Link two parts
		self.params = []
		self.params += self.encoderL.params
		self.params += self.encoderR.params
		# Set up input
		# Note that there are three kinds of inputs altogether, including:
		# 1, inputL, inputR. This pair is used for computing the score after training
		# 2, inputPL, inputPR. This pair is used for training positive pairs
		# 3, inputNL, inputNR. This pair is used for training negative pairs
		self.inputL = self.encoderL.input
		self.inputR = self.encoderR.input
		# Positive 
		self.inputPL = T.matrix(name='inputPL', dtype=floatX)
		self.inputPR = T.matrix(name='inputPR', dtype=floatX)
		# Negative
		self.inputNL = T.matrix(name='inputNL', dtype=floatX)
		self.inputNR = T.matrix(name='inputNR', dtype=floatX)
		# Get output of two BRNNEncoders
		self.hiddenL = self.encoderL.output
		self.hiddenR = self.encoderR.output
		# Positive Hidden
		self.hiddenPL = self.encoderL.encode(self.inputPL)
		self.hiddenPR = self.encoderR.encode(self.inputPR)
		# Negative Hidden
		self.hiddenNL = self.encoderL.encode(self.inputNL)
		self.hiddenNR = self.encoderR.encode(self.inputNR)
		# Activation function
		self.act = Activation(config.activation)
		self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0)
		self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=0)
		self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=0)
		# Build hidden layer
		self.hidden_layer = HiddenLayer(self.hidden, 
										(4*config.num_hidden, config.num_mlp), 
										act=Activation(config.hiddenact))
		self.compressed_hidden = self.hidden_layer.output
		self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
		self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
		# Accumulate parameters
		self.params += self.hidden_layer.params
		# Dropout parameter
		srng = T.shared_randomstreams.RandomStreams(config.random_seed)
		mask = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hidden.shape)
		maskP = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenP.shape)
		maskN = srng.binomial(n=1, p=1-config.dropout, size=self.compressed_hiddenN.shape)
		self.compressed_hidden *= T.cast(mask, floatX)
		self.compressed_hiddenP *= T.cast(maskP, floatX)
		self.compressed_hiddenN *= T.cast(maskN, floatX)
		# Score layer
		self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
		self.output = self.score_layer.output
		self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
		self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
		# Accumulate parameters
		self.params += self.score_layer.params
		# Build cost function
		self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0 - self.scoreP + self.scoreN))
		# Construct the total number of parameters in the model
		self.gradparams = T.grad(self.cost, self.params)
		# Compute the total number of parameters in the model
		self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
		self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \
									 config.num_mlp + 1
		self.num_params = self.num_params_encoder + self.num_params_classifier
		# Build class functions
		self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
		# Compute the gradient of the objective function and cost and prediction
		self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, 
																 self.inputNL, self.inputNR],
														 outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
		# Output function for debugging purpose
		self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
										   outputs=[self.scoreP, self.scoreN])
		self.show_hiddens = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
											outputs=[self.hiddenP, self.hiddenN])
		if verbose:
			logger.debug('Architecture of BRNNMatchScorer built finished, summarized below: ')
			logger.debug('Input dimension: %d' % config.num_input)
			logger.debug('Hidden dimension of RNN: %d' % config.num_hidden)
			logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
			logger.debug('There are 2 BRNNEncoders used in the model.')
			logger.debug('Total number of parameters in this model: %d' % self.num_params)

	def update_params(self, grads, learn_rate):
		'''
		@grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
				They are the corresponding gradients of model parameters.
		@learn_rate: scalar. Learning rate.
		'''
		for param, grad in zip(self.params, grads):
			p = param.get_value(borrow=True)
			param.set_value(p - learn_rate * grad, borrow=True)

	def set_params(self, params):
		'''
		@params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
		'''
		for p, param in zip(self.params, params):
			p.set_value(param, borrow=True)

	def deepcopy(self, brnn):
		'''
		@brnn: BRNNMatchScorer. Copy the model parameters of another BRNNMatchScorer.
		'''
		assert len(self.params) == len(brnn.params)
		for p, param in zip(self.params, brnn.params):
			val = param.get_value()
			p.set_value(val)

	@staticmethod
	def save(fname, model):
		'''
		@fname: String. Filename to store the model.
		@model: BRNNMatcher. An instance of BRNNMatcher to be saved.
		'''
		with file(fname, 'wb') as fout:
			cPickle.dump(model, fout)

	@staticmethod
	def load(fname):
		'''
		@fname: String. Filename to load the model.
		'''
		with file(fname, 'rb') as fin:
			model = cPickle.load(fin)
		return model
示例#4
0
class BRNNMatchScorer(object):
    '''
	Bidirectional RNN for text matching as a classification problem.
	'''
    def __init__(self, config, verbose=True):
        # Construct two BRNNEncoders for matching two sentences
        self.encoderL = BRNNEncoder(config, verbose)
        self.encoderR = BRNNEncoder(config, verbose)
        # Link two parts
        self.params = []
        self.params += self.encoderL.params
        self.params += self.encoderR.params
        # Set up input
        # Note that there are three kinds of inputs altogether, including:
        # 1, inputL, inputR. This pair is used for computing the score after training
        # 2, inputPL, inputPR. This pair is used for training positive pairs
        # 3, inputNL, inputNR. This pair is used for training negative pairs
        self.inputL = self.encoderL.input
        self.inputR = self.encoderR.input
        # Positive
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Get output of two BRNNEncoders
        self.hiddenL = self.encoderL.output
        self.hiddenR = self.encoderR.output
        # Positive Hidden
        self.hiddenPL = self.encoderL.encode(self.inputPL)
        self.hiddenPR = self.encoderR.encode(self.inputPR)
        # Negative Hidden
        self.hiddenNL = self.encoderL.encode(self.inputNL)
        self.hiddenNR = self.encoderR.encode(self.inputNR)
        # Activation function
        self.act = Activation(config.activation)
        self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0)
        self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=0)
        self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=0)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(
            self.hidden, (4 * config.num_hidden, config.num_mlp),
            act=Activation(config.hiddenact))
        self.compressed_hidden = self.hidden_layer.output
        self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
        self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
        # Accumulate parameters
        self.params += self.hidden_layer.params
        # Dropout parameter
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1,
                             p=1 - config.dropout,
                             size=self.compressed_hidden.shape)
        maskP = srng.binomial(n=1,
                              p=1 - config.dropout,
                              size=self.compressed_hiddenP.shape)
        maskN = srng.binomial(n=1,
                              p=1 - config.dropout,
                              size=self.compressed_hiddenN.shape)
        self.compressed_hidden *= T.cast(mask, floatX)
        self.compressed_hiddenP *= T.cast(maskP, floatX)
        self.compressed_hiddenN *= T.cast(maskN, floatX)
        # Score layer
        self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
        self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
        # Accumulate parameters
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(
            T.maximum(T.zeros_like(self.scoreP),
                      1.0 - self.scoreP + self.scoreN))
        # Construct the total number of parameters in the model
        self.gradparams = T.grad(self.cost, self.params)
        # Compute the total number of parameters in the model
        self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
        self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \
                config.num_mlp + 1
        self.num_params = self.num_params_encoder + self.num_params_classifier
        # Build class functions
        self.score = theano.function(inputs=[self.inputL, self.inputR],
                                     outputs=self.output)
        # Compute the gradient of the objective function and cost and prediction
        self.compute_cost_and_gradient = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN])
        # Output function for debugging purpose
        self.show_scores = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=[self.scoreP, self.scoreN])
        self.show_hiddens = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=[self.hiddenP, self.hiddenN])
        if verbose:
            logger.debug(
                'Architecture of BRNNMatchScorer built finished, summarized below: '
            )
            logger.debug('Input dimension: %d' % config.num_input)
            logger.debug('Hidden dimension of RNN: %d' % config.num_hidden)
            logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
            logger.debug('There are 2 BRNNEncoders used in the model.')
            logger.debug('Total number of parameters in this model: %d' %
                         self.num_params)

    def update_params(self, grads, learn_rate):
        '''
		@grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
				They are the corresponding gradients of model parameters.
		@learn_rate: scalar. Learning rate.
		'''
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    def set_params(self, params):
        '''
		@params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
		'''
        for p, param in zip(self.params, params):
            p.set_value(param, borrow=True)

    def deepcopy(self, brnn):
        '''
		@brnn: BRNNMatchScorer. Copy the model parameters of another BRNNMatchScorer.
		'''
        assert len(self.params) == len(brnn.params)
        for p, param in zip(self.params, brnn.params):
            val = param.get_value()
            p.set_value(val)

    @staticmethod
    def save(fname, model):
        '''
		@fname: String. Filename to store the model.
		@model: BRNNMatcher. An instance of BRNNMatcher to be saved.
		'''
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        '''
		@fname: String. Filename to load the model.
		'''
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model
class MLPRanker(object):
    def __init__(self, verbose=True):
        if verbose: logger.debug('Build Multilayer Perceptron Ranking model...')
        # Positive input setting
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative input setting
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Standard input setting
        self.inputL = T.matrix(name='inputL', dtype=floatX)
        self.inputR = T.matrix(name='inputR', dtype=floatX)
        # Build activation function
        self.act = Activation('tanh')
        # Connect input matrices
        self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1)
        self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1)
        self.input = T.concatenate([self.inputL, self.inputR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(self.input, (2*edim, args.hidden), act=self.act)
        self.hidden = self.hidden_layer.output
        self.hiddenP = self.hidden_layer.encode(self.inputP)
        self.hiddenN = self.hidden_layer.encode(self.inputN)
        # Dropout parameter - test
        self.thidden = (1-args.dropout) * self.hidden
        self.thiddenP = (1-args.dropout) * self.hiddenP
        self.thiddenN = (1-args.dropout) * self.hiddenN
        # Dropout parameter - train
        srng = T.shared_randomstreams.RandomStreams(args.seed)
        mask = srng.binomial(n=1, p=1-args.dropout, size=self.hidden.shape)
        maskP = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenP.shape)
        maskN = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenN.shape)
        self.hidden *= T.cast(mask, floatX)
        self.hiddenP *= T.cast(maskP, floatX)
        self.hiddenN *= T.cast(maskN, floatX)
        # Build linear output layer
        self.score_layer = ScoreLayer(self.hidden, args.hidden)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.hiddenP)
        self.scoreN = self.score_layer.encode(self.hiddenN)
        # Build for test
        self.toutput = self.score_layer.encode(self.thidden)
        self.tscoreP = self.score_layer.encode(self.thiddenP)
        self.tscoreN = self.score_layer.encode(self.thiddenN)
        # Stack all the parameters
        self.params = []
        self.params += self.hidden_layer.params
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0-self.scoreP+self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Count the total number of parameters in this model
        self.num_params = edim * args.hidden + args.hidden + args.hidden + 1
        # Build class method
        self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.toutput)
        self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                                         outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                           outputs=[self.tscoreP, self.tscoreN])
        if verbose:
            logger.debug('Architecture of MLP Ranker built finished, summarized below: ')
            logger.debug('Input dimension: %d' % edim)
            logger.debug('Hidden dimension: %d' % args.hidden)
            logger.debug('Total number of parameters used in the model: %d' % self.num_params)

    def update_params(self, grads, learn_rate):
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    @staticmethod
    def save(fname, model):
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model
示例#6
0
class ExtGrCNNMatchScorer(object):
    '''
    Extended Gated Recursive Convolutional Neural Network for matching task. The last 
    layer of the model includes a linear layer for regression.
    '''
    def __init__(self, config=None, verbose=True):
        # Construct two GrCNNEncoders for matching two sentences
        self.encoderL = ExtGrCNNEncoder(config, verbose)
        self.encoderR = ExtGrCNNEncoder(config, verbose)
        # Link the parameters of two parts
        self.params = []
        self.params += self.encoderL.params
        self.params += self.encoderR.params
        # Build three kinds of inputs:
        # 1, inputL, inputR. This pair is used for computing the score after training
        # 2, inputPL, inputPR. This part is used for training positive pairs
        # 3, inputNL, inputNR. This part is used for training negative pairs
        self.inputL = self.encoderL.input
        self.inputR = self.encoderR.input
        # Positive
        self.inputPL = T.matrix(name='inputPL', dtype=floatX)
        self.inputPR = T.matrix(name='inputPR', dtype=floatX)
        # Negative
        self.inputNL = T.matrix(name='inputNL', dtype=floatX)
        self.inputNR = T.matrix(name='inputNR', dtype=floatX)
        # Linking input-output mapping
        self.hiddenL = self.encoderL.output
        self.hiddenR = self.encoderR.output
        # Positive
        self.hiddenPL = self.encoderL.encode(self.inputPL)
        self.hiddenPR = self.encoderR.encode(self.inputPR)
        # Negative
        self.hiddenNL = self.encoderL.encode(self.inputNL)
        self.hiddenNR = self.encoderR.encode(self.inputNR)
        # Activation function
        self.act = Activation(config.activation)
        # MLP Component
        self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1)
        self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1)
        self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1)
        # Build hidden layer
        self.hidden_layer = HiddenLayer(
            self.hidden, (2 * config.num_hidden, config.num_mlp),
            act=Activation(config.hiddenact))
        self.compressed_hidden = self.hidden_layer.output
        self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
        self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
        # Accumulate parameters
        self.params += self.hidden_layer.params
        # Dropout parameter
        srng = T.shared_randomstreams.RandomStreams(config.random_seed)
        mask = srng.binomial(n=1,
                             p=1 - config.dropout,
                             size=self.compressed_hidden.shape)
        maskP = srng.binomial(n=1,
                              p=1 - config.dropout,
                              size=self.compressed_hiddenP.shape)
        maskN = srng.binomial(n=1,
                              p=1 - config.dropout,
                              size=self.compressed_hiddenN.shape)
        self.compressed_hidden *= T.cast(mask, floatX)
        self.compressed_hiddenP *= T.cast(maskP, floatX)
        self.compressed_hiddenN *= T.cast(maskN, floatX)
        # Score layers
        self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
        self.output = self.score_layer.output
        self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
        self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
        # Accumulate parameters
        self.params += self.score_layer.params
        # Build cost function
        self.cost = T.mean(
            T.maximum(T.zeros_like(self.scoreP),
                      1.0 - self.scoreP + self.scoreN))
        # Construct the gradient of the cost function with respect to the model parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Compute the total number of parameters in the model
        self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
        self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \
                                     config.num_mlp + \
                                     config.num_mlp + 1
        self.num_params = self.num_params_encoder + self.num_params_classifier
        # Build class methods
        self.score = theano.function(inputs=[self.inputL, self.inputR],
                                     outputs=self.output)
        self.compute_cost_and_gradient = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN])
        self.show_scores = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=[self.scoreP, self.scoreN])
        self.show_hiddens = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=[self.hiddenP, self.hiddenN])
        self.show_inputs = theano.function(
            inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
            outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR])

        if verbose:
            logger.debug(
                'Architecture of ExtGrCNNMatchScorer built finished, summarized below: '
            )
            logger.debug('Input dimension: %d' % config.num_input)
            logger.debug(
                'Hidden dimension inside GrCNNMatchScorer pyramid: %d' %
                config.num_hidden)
            logger.debug('Hidden dimension MLP: %d' % config.num_mlp)
            logger.debug('Number of Gating functions: %d' % config.num_gates)
            logger.debug('There are 2 ExtGrCNNEncoders used in model.')
            logger.debug('Total number of parameters used in the model: %d' %
                         self.num_params)

    def update_params(self, grads, learn_rate):
        '''
        @grads: [np.ndarray]. List of numpy.ndarray for updating the model parameters.
        @learn_rate: scalar. Learning rate.
        '''
        for param, grad in zip(self.params, grads):
            p = param.get_value(borrow=True)
            param.set_value(p - learn_rate * grad, borrow=True)

    def set_params(self, params):
        '''
        @params: [np.ndarray]. List of numpy.ndarray to set the model parameters.
        '''
        for p, param in zip(self.params, params):
            p.set_value(param, borrow=True)

    def deepcopy(self, grcnn):
        '''
        @grcnn: GrCNNMatchScorer. Copy the model parameters of another GrCNNMatchScorer and use it.
        '''
        assert len(self.params) == len(grcnn.params)
        for p, param in zip(self.params, grcnn.params):
            val = param.get_value()
            p.set_value(val)

    @staticmethod
    def save(fname, model):
        '''
        @fname: String. Filename to store the model.
        @model: GrCNNMatchScorer. An instance of GrCNNMatchScorer to be saved.
        '''
        with file(fname, 'wb') as fout:
            cPickle.dump(model, fout)

    @staticmethod
    def load(fname):
        '''
        @fname: String. Filename to load the model.
        '''
        with file(fname, 'rb') as fin:
            model = cPickle.load(fin)
        return model