Пример #1
0
    def __init__(self,
                 filters,
                 kernel_size,
                 stride=1,
                 padding='same',
                 activation=None):
        """
		Params:
		filters: Number of Filters
		kernel_size: shape of the kernel
		stride: the stride
		padding: valid or same
		activation: activation function
		"""
        self.filters = filters

        num_weights = kernel_size[0] * kernel_size[1]
        self.kernel_size = kernel_size
        self.weights = None
        self.bias = None

        self.padding = (kernel_size[0] - 1) // 2 if padding == 'same' else 0
        self.stride = stride
        self.output_units = []

        self.activation = Activation(activation)
Пример #2
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 activation,
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = hidden_dim
        self.act = Activation(activation)
        self.dropout = dropout
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W', initializer)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b')
        self.params = [self.W, self.b]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Пример #3
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='iden',
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(TransEModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, entity_dim, entity_dim)
        self.W = shared_rand_matrix((relation_num, self.entity_dim),
                                    prefix + 'TransE_R', initializer)
        self.act = Activation(activation)
        self.params = [self.W]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W**2)

        if verbose:
            logger.debug(
                'Architecture of TransE Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
    def feedforward(self, x):
        self.layer_1_output = Activation.sigmoid(np.dot(x, self.weights_1))
        self.layer_2_output = Activation.sigmoid(
            np.dot(self.layer_1_output, self.weights_2))

        self.output = self.layer_2_output
        return self.output
Пример #5
0
    def __init__(self, in_dim, activation, hidden_dim=None, transform_gate="sigmoid", prefix="",
                 initializer=default_initializer, dropout=0, verbose=True):
        # By construction the dimensions of in_dim and out_dim have to match, and hence W_T and W_H are square matrices.
        if hidden_dim is not None:
            assert in_dim == hidden_dim
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(HighwayLayer, self).__init__(in_dim, in_dim, activation, prefix, initializer, dropout, verbose)
        self.transform_gate = Activation(transform_gate)
        self.W_H, self.W_H.name = self.W, prefix + "W_H"
        self.b_H, self.b_H.name = self.b, prefix + "b_H"
        self.W_T = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_T', initializer)
        self.b_T = shared_zero_matrix((self.hidden_dim,), prefix + 'b_T')
        self.params = [self.W_H, self.W_T, self.b_H, self.b_T]
        self.norm_params = [self.W_H, self.W_T]
        self.l1_norm = T.sum([T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param ** 2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Transform Gate:   %s' % self.transform_gate.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Пример #6
0
    def __init__(self, configs=None, verbose=True):
        '''
		Basic RNN is an unsupervised component, where the input is a sequence and the 
		output is a vector with fixed length
		'''
        if verbose: pprint('Build Recurrent Neural Network...')
        self.input = T.matrix(name='input', dtype=floatX)
        self.learn_rate = T.scalar(name='learn rate')
        # Configure activation function
        self.act = Activation(configs.activation)
        fan_in = configs.num_input
        fan_out = configs.num_hidden
        # Initialize all the variables in RNN, including:
        # 1, Feed-forward matrix, feed-forward bias, W, W_b
        # 2, Recurrent matrix, recurrent bias, U, U_b
        self.W = theano.shared(value=np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (fan_in + fan_out)),
            high=np.sqrt(6.0 / (fan_in + fan_out)),
            size=(fan_in, fan_out)),
                                                dtype=floatX),
                               name='W',
                               borrow=True)
        self.U = theano.shared(value=np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (fan_out + fan_out)),
            high=np.sqrt(6.0 / (fan_out + fan_out)),
            size=(fan_out, fan_out)),
                                                dtype=floatX),
                               name='U',
                               borrow=True)
        # Bias parameter for the hidden-layer encoder of RNN
        self.b = theano.shared(value=np.zeros(fan_out, dtype=floatX),
                               name='b',
                               borrow=True)
        # h[0], zero vector
        self.h0 = theano.shared(value=np.zeros(fan_out, dtype=floatX),
                                name='h0',
                                borrow=True)
        # Save all the parameters
        self.params = [self.W, self.U, self.b, self.h0]

        # recurrent function used to compress a sequence of input vectors
        # the first dimension should correspond to time
        def step(x_t, h_tm1):
            h_t = self.act.activate(T.dot(x_t, self.W) + \
                  T.dot(h_tm1, self.U) + self.b)
            return h_t

        # h is the hidden representation over a time sequence
        self.hs, _ = theano.scan(fn=step,
                                 sequences=self.input,
                                 outputs_info=[self.h0],
                                 truncate_gradient=configs.bptt)
        self.h = self.hs[-1]
        # L1, L2 regularization
        self.L1_norm = T.sum(T.abs_(self.W) + T.abs_(self.U))
        self.L2_norm = T.sum(self.W**2) + T.sum(self.U**2)
        # Compress function
        self.compress = theano.function(inputs=[self.input], outputs=self.h)
Пример #7
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 initializer=default_initializer,
                 normalize=True,
                 dropout=0,
                 reconstructe=True,
                 activation="tanh",
                 verbose=True):
        """
        :param in_dim:          输入维度
        :param hidden_dim:      隐层维度
        :param initializer:     随机初始化器
        :param normalize:       是否归一化
        :param dropout:         dropout率
        :param activation:      激活函数
        :param verbose:         是否输出Debug日志内容
        :return:
        """
        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        assert self.in_dim == self.hidden_dim

        self.initializer = initializer
        self.normalize = normalize
        self.dropout = dropout
        self.verbose = verbose
        self.act = Activation(activation)
        # Composition Function Weight
        # (dim, 2 * dim)
        self.W = shared_rand_matrix((self.hidden_dim, 2 * self.in_dim),
                                    'W',
                                    initializer=initializer)
        # (dim, )
        self.b = shared_zero_matrix((self.hidden_dim, ), 'b')
        # Reconstruction Function Weight
        # (2 * dim, dim)
        self.Wr = shared_rand_matrix((2 * self.in_dim, self.hidden_dim),
                                     'Wr',
                                     initializer=initializer)
        # (2 * dim, )
        self.br = shared_zero_matrix((self.in_dim * 2, ), 'br')
        self.params = [self.W, self.b, self.Wr, self.br]
        self.norm_params = [self.W, self.Wr]

        self.l1_norm = sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of RAE built finished, summarized as below: ')
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Normalize:        %s' % self.normalize)
            logger.debug('Activation:       %s' % self.act)
            logger.debug('Dropout Rate:     %s' % self.dropout)
Пример #8
0
    def __init__(self, in_dim, hidden_dim, kernel_size=3, padding='same', pooling='max', dilation_rate=1.0,
                 activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True):
        """
        Init Function for ConvolutionLayer
        :param in_dim:
        :param hidden_dim:
        :param kernel_size:
        :param padding: 'same', 'valid'
        :param pooling: 'max', 'mean', 'min'
        :param dilation_rate:
        :param activation:
        :param prefix:
        :param initializer:
        :param dropout:
        :param verbose:
        """
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))

        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.padding = padding
        self.dilation_rate = dilation_rate
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        self.padding_size = int(self.dilation_rate * (self.kernel_size - 1))
        # Composition Function Weight
        # Kernel Matrix (kernel_size, hidden, in)
        self.W = shared_rand_matrix((self.kernel_size, self.hidden_dim, self.in_dim), prefix + 'W', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim,), prefix + 'b')

        self.params = [self.W, self.b]
        self.norm_params = [self.W]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W ** 2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Filter Num  (Hidden): %d' % self.hidden_dim)
            logger.debug('Kernel Size (Windows): %d' % self.kernel_size)
            logger.debug('Padding method :  %s' % self.padding)
            logger.debug('Dilation Rate  :  %s' % self.dilation_rate)
            logger.debug('Padding Size   :  %s' % self.padding_size)
            logger.debug('Pooling method :  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Пример #9
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='tanh',
                 hidden=5,
                 keep_normal=False,
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(NeuralTensorModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        self.hidden = hidden
        self.slice_seq = T.arange(hidden)
        self.keep_normal = keep_normal
        # (relation_num, entity_dim, entity_dim, hidden)
        self.W = shared_rand_matrix(
            (relation_num, self.entity_dim, self.entity_dim, self.hidden),
            prefix + 'NTN_W', initializer)
        # (relation_num, hidden)
        self.U = shared_ones_matrix((relation_num, self.hidden),
                                    name=prefix + 'NTN_U')
        if keep_normal:
            # (relation_num, entity_dim, hidden)
            self.V = shared_rand_matrix(
                (relation_num, self.entity_dim * 2, self.hidden),
                prefix + 'NTN_V', initializer)
            # (relation_num, hidden)
            self.b = shared_zero_matrix((relation_num, self.hidden),
                                        name=prefix + 'NTN_B')
            self.params = [self.W, self.V, self.U, self.b]
            self.norm_params = [self.W, self.V, self.U, self.b]
        else:
            self.params = [self.W]
            self.norm_params = [self.W]
        self.act = Activation(activation)
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of Tensor Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
Пример #10
0
class HiddenLayer(object):
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 activation,
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = hidden_dim
        self.act = Activation(activation)
        self.dropout = dropout
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W', initializer)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b')
        self.params = [self.W, self.b]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)

    def forward(self, x):
        """
        :param x: (dim, )
        """
        output = self.act.activate(T.dot(self.W, x) + self.b)
        return dropout_from_layer(output, self.dropout)

    def forward_batch(self, x):
        """
        :param x: (batch, dim)
        """
        # (batch, in) (in, hidden) + (None, hidden) -> (batch, hidden)
        output = self.act.activate(T.dot(x, self.W.T) + self.b)
        return dropout_from_layer(output, self.dropout)
Пример #11
0
def train(dataset):
    config_options = globals.config
    task_path = config_options.get("Data", dataset)
    loss = config_options.get('Train', 'loss')
    activation = config_options.get('Train', 'activation')

    if dataset == "classify":
        Xtrain = z_norm(load_mnist_X(task_path + "classf_Xtrain.txt"))
        Xtest = z_norm(load_mnist_X(task_path + "classf_Xtest.txt"))
        Xval = z_norm(load_mnist_X(task_path + "classf_XVal.txt"))
        ytrain = load_mnist_Y(task_path + "classf_ytrain.txt")
        ytest = load_mnist_Y(task_path + "classf_ytest.txt")
        yval = load_mnist_Y(task_path + "classf_yVal.txt")
    elif dataset == "regression":
        Xtrain = z_norm(load_regression_X(task_path + "regr_Xtrain.txt"))
        Xtest = z_norm(load_regression_X(task_path + "regr_Xtest.txt"))
        Xval = z_norm(load_regression_X(task_path + "regr_Xval.txt"))
        ytrain = load_regression_Y(task_path + "regr_ytrain.txt")
        ytest = load_regression_Y(task_path + "regr_ytest.txt")
        yval = load_regression_Y(task_path + "regr_yval.txt")
    else:
        logger.warning("Invalid task.")
        return
    logger.info("Load data complete.")

    # build model
    N, input_dim = Xtrain.shape

    model = Model()
    model.add(Layer(output_dim=globals.layer_dim, input_dim=input_dim))
    model.add(Activation(activation=activation))
    model.add(Layer(output_dim=globals.output_dim))

    model.compile(loss=loss)
    history = model.fit(Xtrain,
                        ytrain,
                        batch_size=N,
                        iterations=globals.iterations,
                        validation_data=(Xval, yval))

    # save result
    result_dir = config_options.get('Result', 'result-dir')
    file_name = "_".join([
        dataset, activation,
        str(globals.alpha),
        str(globals.lam),
        str(globals.layer_dim),
        str(globals.iterations)
    ]) + ".txt"
    file_path = result_dir + file_name
    writeFile(file_path, "")
    for datum in history:
        datum = [str(x) for x in datum]
        line = "\t".join(datum) + "\n"
        writeFile(file_path, line, 'a')

    print model.loss.mse(Xval, yval)
    print model.loss.mse(Xtest, ytest)
Пример #12
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(RecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling,
                                               activation, dropout)

        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        # Composition Function Weight
        # Feed-Forward Matrix (hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W_forward', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward')
        # Recurrent Matrix (hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim),
                                    prefix + 'U_forward', initializer)

        self.params = [self.W, self.U, self.b]
        self.norm_params = [self.W, self.U]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Пример #13
0
    def __init__(self, in_dim, hidden_dim, kernel_sizes=[3, 4, 5], padding='same', pooling='max', dilation_rate=1.0,
                 activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True):
        """
        Init Function for ConvolutionLayer
        :param in_dim:
        :param hidden_dim:
        :param kernel_sizes:
        :param padding: 'same', 'valid'
        :param pooling: 'max', 'mean', 'min'
        :param dilation_rate:
        :param activation:
        :param prefix:
        :param initializer:
        :param dropout:
        :param verbose:
        """
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.conv_layers = list()
        self.in_dim = in_dim
        self.out_dim = hidden_dim * len(kernel_sizes)
        self.hidden_dim = hidden_dim
        self.kernel_sizes = kernel_sizes
        self.padding = padding
        self.dilation_rate = dilation_rate
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)

        self.params = list()
        self.norm_params = list()

        # L1, L2 Norm
        self.l1_norm = 0
        self.l2_norm = 0

        for filter_hs in kernel_sizes:
            self.conv_layers.append(ConvolutionLayer(in_dim=self.in_dim, hidden_dim=hidden_dim, kernel_size=filter_hs,
                                                     padding=self.padding, pooling=self.pooling,
                                                     dilation_rate=self.dilation_rate, activation=activation,
                                                     prefix=prefix+"filter%s_" % filter_hs, initializer=initializer,
                                                     dropout=dropout, verbose=verbose))
            self.params += self.conv_layers[-1].params
            self.norm_params += self.conv_layers[-1].norm_params
            self.l1_norm += self.conv_layers[-1].l1_norm
            self.l2_norm += self.conv_layers[-1].l2_norm

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Filter Num  (Hidden): %d' % self.hidden_dim)
            logger.debug('Kernel Size (Windows): %s' % self.kernel_sizes)
            logger.debug('Padding method :  %s' % self.padding)
            logger.debug('Dilation Rate  :  %s' % self.dilation_rate)
            logger.debug('Pooling method :  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Пример #14
0
 def __init__(self, verbose=True):
     if verbose: logger.debug('Build Multilayer Perceptron Ranking model...')
     # Positive input setting
     self.inputPL = T.matrix(name='inputPL', dtype=floatX)
     self.inputPR = T.matrix(name='inputPR', dtype=floatX)
     # Negative input setting
     self.inputNL = T.matrix(name='inputNL', dtype=floatX)
     self.inputNR = T.matrix(name='inputNR', dtype=floatX)
     # Standard input setting
     self.inputL = T.matrix(name='inputL', dtype=floatX)
     self.inputR = T.matrix(name='inputR', dtype=floatX)
     # Build activation function
     self.act = Activation('tanh')
     # Connect input matrices
     self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1)
     self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1)
     self.input = T.concatenate([self.inputL, self.inputR], axis=1)
     # Build hidden layer
     self.hidden_layer = HiddenLayer(self.input, (2*edim, args.hidden), act=self.act)
     self.hidden = self.hidden_layer.output
     self.hiddenP = self.hidden_layer.encode(self.inputP)
     self.hiddenN = self.hidden_layer.encode(self.inputN)
     # Dropout parameter
     #srng = T.shared_randomstreams.RandomStreams(args.seed)
     #mask = srng.binomial(n=1, p=1-args.dropout, size=self.hidden.shape)
     #maskP = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenP.shape)
     #maskN = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenN.shape)
     #self.hidden *= T.cast(mask, floatX)
     #self.hiddenP *= T.cast(maskP, floatX)
     #self.hiddenN *= T.cast(maskN, floatX)
     # Build linear output layer
     self.score_layer = ScoreLayer(self.hidden, args.hidden)
     self.output = self.score_layer.output
     self.scoreP = self.score_layer.encode(self.hiddenP)
     self.scoreN = self.score_layer.encode(self.hiddenN)
     # Stack all the parameters
     self.params = []
     self.params += self.hidden_layer.params
     self.params += self.score_layer.params
     # Build cost function
     self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0-self.scoreP+self.scoreN))
     # Construct the gradient of the cost function with respect to the model parameters
     self.gradparams = T.grad(self.cost, self.params)
     # Count the total number of parameters in this model
     self.num_params = edim * args.hidden + args.hidden + args.hidden + 1
     # Build class method
     self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
     self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                                      outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
     self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                        outputs=[self.scoreP, self.scoreN])
     if verbose:
         logger.debug('Architecture of MLP Ranker built finished, summarized below: ')
         logger.debug('Input dimension: %d' % edim)
         logger.debug('Hidden dimension: %d' % args.hidden)
         logger.debug('Total number of parameters used in the model: %d' % self.num_params)
Пример #15
0
	def __init__(self, input_units, output_units, activation = None):
		"""
		Params:
		input_units = Number of input nodes
		output_units = Number of output nodes
		activation = The activation layer
		"""
		# self.weights = np.random.normal(0.0, 1.0/np.sqrt(input_units), (input_units, output_units))
		# self.bias = np.random.normal(0.0, 1.0/np.sqrt(input_units), (1, output_units))
		# self.weights = np.random.uniform(-0.01, 0.01, (input_units, output_units))
		self.weights = np.linspace(-0.01, 0.01, num = input_units*output_units)
		self.weights = self.weights.reshape((input_units, output_units))
		self.bias = np.zeros((1,output_units))
		self.activation = Activation(activation)

		# Initialize Other Things as Zero
		self.output_units = None
		self.grad_weights = 0
		self.grad_bias = 0
Пример #16
0
    def __init__(self, input_size, output_size, hidden_size, n_layers,
                 act_type):
        '''
        Multilayer Perceptron
        ----------------------
        :param input_size: dimension of input features
        :param output_size: dimension of output features
        :param hidden_size: a list containing hidden size for each hidden layer
        :param n_layers: number of layers
        :param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
        '''
        super(MLP, self).__init__()

        # total layer number should be hidden layer number + 1 (output layer)
        assert len(
            hidden_size
        ) + 1 == n_layers, 'total layer number should be hidden layer number + 1'

        # define the activation function by activation function in activations.py
        self.act = Activation(act_type)

        # initialize a list to save layers
        layers = nn.ModuleList()

        if n_layers == 1:
            # if n_layers == 1, MLP degenerates to a Linear layer
            layer = Linear(input_size, output_size)
            # append the layer into layers
            layers.append(layer)
            layers.append(self.act)

        # TODO 4: Finish MLP with at least 2 layers
        else:
            # step 1: initialize the input layer
            layer = Linear(input_size, hidden_size[0])
            # step 2: append the input layer and the activation layer into layers
            layers.append(layer)
            layers.append(self.act)
            # step 3: construct the hidden layers and add it to layers
            for i in range(1, n_layers - 1):
                #initialize a hidden layer and activation layer
                # hint: Noting that the output size of a hidden layer is hidden_size[i], so what is its input size?
                layer = Linear(hidden_size[i - 1], hidden_size[i])
                layers.append(layer)
                layers.append(self.act)

            # step 4: initialize the output layer and append the layer into layers
            # hint: what is the output size of the output layer?
            # hint: here we do not need activation layer
            layer = Linear(hidden_size[-1], output_size)
            layers.append(layer)
            # End TODO 4

        #Use nn.Sequential to get the neural network
        self.net = nn.Sequential(*layers)
Пример #17
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 gates=("sigmoid", "sigmoid", "sigmoid"),
                 prefix="",
                 initializer=OrthogonalInitializer(),
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(LSTMEncoder, self).__init__(in_dim, hidden_dim, pooling,
                                          activation, dropout)
        self.in_gate, self.forget_gate, self.out_gate = Activation(
            gates[0]), Activation(gates[1]), Activation(gates[2])

        # W [in, forget, output, recurrent] (4 * hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim),
                                    prefix + 'W', initializer)
        # U [in, forget, output, recurrent] (4 * hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim),
                                    prefix + 'U', initializer)
        # b [in, forget, output, recurrent] (4 * hidden,)
        self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b')

        self.params = [self.W, self.U, self.b]
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Input Gate:       %s' % self.in_gate.method)
            logger.debug('Forget Gate:      %s' % self.forget_gate.method)
            logger.debug('Output Gate:      %s' % self.out_gate.method)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Пример #18
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 hidden=50,
                 activation='tanh',
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(SingleLayerModel, self).__init__()
        self.hidden = hidden
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, k, entity_dim)
        self.W_1 = shared_rand_matrix(
            (relation_num, self.hidden, self.entity_dim),
            prefix + 'SingleLayer_W1', initializer)
        # (relation_num, k, entity_dim)
        self.W_2 = shared_rand_matrix(
            (relation_num, self.hidden, self.entity_dim),
            prefix + 'SingleLayer_W2', initializer)
        # (relation_num, k, )
        self.u = shared_ones_matrix((
            relation_num,
            self.hidden,
        ), prefix + 'SingleLayer_u')
        self.act = Activation(activation)
        self.params = [self.W_1, self.W_2, self.u]
        self.norm_params = [self.W_1, self.W_2, self.u]
        self.l1_norm = T.sum(T.abs_(self.W_1)) + T.sum(T.abs_(
            self.W_2)) + T.sum(T.abs_(self.u))
        self.l2_norm = T.sum(self.W_1**2) + T.sum(self.W_2**2) + T.sum(self.u**
                                                                       2)

        if verbose:
            logger.debug(
                'Architecture of Single Layer Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
Пример #19
0
 def __init__(self,
              in_dim,
              hidden_dim,
              pooling,
              activation='tanh',
              dropout=0):
     self.in_dim = in_dim
     self.out_dim = hidden_dim
     self.hidden_dim = hidden_dim
     self.pooling = pooling
     self.dropout = dropout
     self.act = Activation(activation)
Пример #20
0
 def __init__(self,
              word_dim,
              seq_dim,
              hidden_dim,
              activation='tanh',
              initializer=default_initializer):
     super(NNWordBasedAttention,
           self).__init__(word_dim=word_dim,
                          seq_dim=seq_dim,
                          initializer=default_initializer)
     # (dim, dim)
     self.hidden_dim = hidden_dim
     self.W = shared_rand_matrix((self.word_dim, self.hidden_dim),
                                 'Attention_W', initializer)
     self.U = shared_rand_matrix((self.seq_dim, self.hidden_dim),
                                 'Attention_U', initializer)
     self.v = shared_rand_matrix((self.hidden_dim, ), 'Attention_v',
                                 initializer)
     self.act = Activation(activation)
     self.params = [self.W]
     self.norm_params = [self.W]
Пример #21
0
    def __init__(self, name, n_inputs, n_outputs, activation=None, use_bias=True, weights=None, biases=None):
        super().__init__(name)

        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.use_bias = use_bias

        if activation is None:
            activation = Activation.getInitialized("tanh")
        else:
            if not Activation.isObjectRegistered(activation):
                if isinstance(activation, dict):
                    activation = Activation(**activation)
                elif isinstance(activation, str):
                    activation = Activation(class_name=activation)
                else:
                    raise Exception("{} is not a "\
                    "registered activation. Use {}".format(activation, Activation.registeredClasses()))

        self.activation = activation


        if weights is None:
            # Between -1 and 1
            self.weights = (np.random.random((n_outputs, n_inputs)) * 2 - 1)
        else:
            assert isinstance(weights, np.ndarray)
            assert weights.shape == (n_outputs, n_inputs)
            self.weights = weights

        if biases is None:
            # Between -1 and 1
            self.biases = (np.random.random((n_outputs, 1)) * 2 - 1) * 0.001
        else:
            assert isinstance(biases, np.ndarray)
            assert biases.shape == (n_outputs, 1)
            self.biases = biases

        # Mutation mask ... create only once.
        self.mutation_mask = np.zeros_like(self.weights)
Пример #22
0
    def load(self, folder):
        # deduce all import parameters from saved file
        try:
            # load weights & biases
            self.weight = np.load(f"{folder}/weight.npy")
            self.bias  = np.load(f"{folder}/bias.npy")

            # load neurons, layers, activation_functions
            with open(f"{folder}/dense.json", "r") as file:
                data = json.load(file)

            self.inputDim = data["inputDim"]
            self.outputDim = data["outputDim"]
            self.activation = Activation.funcFromStr(data["activation"])

        except Exception as e:
            print(e)
Пример #23
0
	def __init__(self, configs=None, verbose=True):
		'''
		Basic RNN is an unsupervised component, where the input is a sequence and the 
		output is a vector with fixed length
		'''
		if verbose: pprint('Build Recurrent Neural Network...')
		self.input = T.matrix(name='input', dtype=floatX)
		self.learn_rate = T.scalar(name='learn rate')		
		# Configure activation function
		self.act = Activation(configs.activation)
		fan_in = configs.num_input
		fan_out = configs.num_hidden
		# Initialize all the variables in RNN, including:
		# 1, Feed-forward matrix, feed-forward bias, W, W_b
		# 2, Recurrent matrix, recurrent bias, U, U_b
		self.W = theano.shared(value=np.asarray(
					np.random.uniform(low=-np.sqrt(6.0/(fan_in+fan_out)),
									  high=np.sqrt(6.0/(fan_in+fan_out)), 
									  size=(fan_in, fan_out)), dtype=floatX),
					name='W', borrow=True)
		self.U = theano.shared(value=np.asarray(
					np.random.uniform(low=-np.sqrt(6.0/(fan_out+fan_out)),
									  high=np.sqrt(6.0/(fan_out+fan_out)),
									  size=(fan_out, fan_out)), dtype=floatX),
					name='U', borrow=True)
		# Bias parameter for the hidden-layer encoder of RNN
		self.b = theano.shared(value=np.zeros(fan_out, dtype=floatX), name='b', borrow=True)
		# h[0], zero vector
		self.h0 = theano.shared(value=np.zeros(fan_out, dtype=floatX), name='h0', borrow=True)
		# Save all the parameters
		self.params = [self.W, self.U, self.b, self.h0]
		# recurrent function used to compress a sequence of input vectors
		# the first dimension should correspond to time
		def step(x_t, h_tm1):
			h_t = self.act.activate(T.dot(x_t, self.W) + \
									T.dot(h_tm1, self.U) + self.b)
			return h_t
		# h is the hidden representation over a time sequence
		self.hs, _ = theano.scan(fn=step, sequences=self.input, outputs_info=[self.h0],
								truncate_gradient=configs.bptt)
		self.h = self.hs[-1]
		# L1, L2 regularization
		self.L1_norm = T.sum(T.abs_(self.W) + T.abs_(self.U))
		self.L2_norm = T.sum(self.W ** 2) + T.sum(self.U ** 2)
		# Compress function
		self.compress = theano.function(inputs=[self.input], outputs=self.h)
 def __init__(self,
              in_features,
              out_features,
              input_layer=False,
              fully_connected=True):
     self.in_features = in_features
     self.out_features = out_features
     self.fully_connected = fully_connected
     # changed from v0.0.0 #
     self.weights = np.random.randn(out_features, in_features)
     self.bias = np.random.randn(out_features)
     # last part for emphasis #
     self.next_layer = None
     self.prev_layer = None
     self.input_layer = input_layer
     self.variables = 0
     self.activation = Activation()
Пример #25
0
 def testAE(self):
     # Set parameters
     input = T.matrix(name='input')
     num_in, num_out = 784, 500
     act = Activation('sigmoid')
     is_denoising, is_sparse = True, False
     lambda1 = 1e-4
     mask = 0.7
     rng = RandomStreams(42)
     start_time = time.time()
     ae = AutoEncoder(input, (num_in, num_out),
                      act,
                      is_denoising,
                      is_sparse,
                      lambda1,
                      mask,
                      rng,
                      verbose=True)
     end_time = time.time()
     pprint('Time used to build the AutoEncoder: %f seconds.' %
            (end_time - start_time))
     batch_size = 1000
     num_batches = self.training_set.shape[0] / batch_size
     nepoch = 50
     learn_rate = 1
     start_time = time.time()
     for i in xrange(nepoch):
         rate = learn_rate
         for j in xrange(num_batches):
             train_set = self.training_set[j * batch_size:(j + 1) *
                                           batch_size, :]
             cost = ae.train(train_set, rate)
             pprint('epoch %d, batch %d, cost = %f' % (i, j, cost))
     end_time = time.time()
     pprint('Time used for training AutoEncoder: %f seconds.' %
            (end_time - start_time))
     image = PIL.Image.fromarray(
         imgutils.tile_raster_images(
             X=ae.encode_layer.W.get_value(borrow=True).T,
             img_shape=(28, 28),
             tile_shape=(10, 10),
             tile_spacing=(1, 1)))
     image.save('filters_corruption_%.2f.png' % mask)
     AutoEncoder.save('./autoencoder-mnist.model', ae)
Пример #26
0
    def __init__(self, config, verbose=True):
        '''
        @config: GrCNNConfiger. Configer used to set the architecture of ExtGrCNNEncoder.
        '''
        if verbose: logger.debug('Building Extended Gated Recursive Convolutional Neural Network Encoder...')
        # Scale factor for initializing model parameters
        self.scale = config.scale
        # Make theano symbolic tensor for input and model parameters
        self.input = T.matrix(name='ExtGrCNNEncoder input', dtype=floatX)
        # Configure activation function
        self.act = Activation(config.activation)
        fan_in, fan_out = config.num_input, config.num_hidden
        # Initialize model parameter
        np.random.seed(config.random_seed)
        # Projection matrix U
        U_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_in, fan_out))
        U_val = U_val.astype(floatX)
        U_val *= self.scale
        self.U = theano.shared(value=U_val, name='U', borrow=True)
        self.hidden0 = T.dot(self.input, self.U)
        # 3rd-tensor to implement the multi-gate GrCNN Encoders, where the first dimension corresponds
        # to the number of gates
        Wl_vals = [np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out)).astype(floatX) for _ in xrange(config.num_gates)]
        Wl_vals = [np.linalg.svd(Wl_val)[0] for Wl_val in Wl_vals]
        Wl_vals = np.asarray(Wl_vals)
        self.Wl = theano.shared(value=Wl_vals, name='W_l', borrow=True)

        Wr_vals = [np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out)).astype(floatX) for _ in xrange(config.num_gates)]
        Wr_vals = [np.linalg.svd(Wr_val)[0] for Wr_val in Wr_vals]
        Wr_vals = np.asarray(Wr_vals)
        self.Wr = theano.shared(value=Wr_vals, name='W_r', borrow=True)

        self.Wb = theano.shared(value=np.zeros((config.num_gates, fan_out), dtype=floatX), name='W_b', borrow=True)
        # Multi-gate choosing functions
        Gl_vals = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, config.num_gates+2)).astype(floatX)
        self.Gl = theano.shared(value=Gl_vals, name='G_l', borrow=True)

        Gr_vals = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, config.num_gates+2)).astype(floatX)
        self.Gr = theano.shared(value=Gr_vals, name='G_r', borrow=True)

        self.Gb = theano.shared(value=np.zeros(config.num_gates+2, dtype=floatX), name='G_b', borrow=True)
        # Stack all the model parameters
        self.params = [self.U, self.Wl, self.Wr, self.Wb, self.Gl, self.Gr, self.Gb]
        self.num_params = fan_in * fan_out + 2 * config.num_gates * fan_out * fan_out + config.num_gates * fan_out + \
                          2 * (config.num_gates+2) * fan_out + config.num_gates + 2
        # Length of the time sequence
        self.nsteps = self.input.shape[0]
        # Building ExtGrCNNEncoder pyramids
        self.pyramids, _ = theano.scan(fn=self._step_prop, 
                                    sequences=T.arange(self.nsteps-1),
                                    non_sequences=self.nsteps,
                                    outputs_info=[self.hidden0],
                                    n_steps=self.nsteps-1)
        self.output = self.pyramids[-1][0].dimshuffle('x', 0)
        # Compression -- Encoding function
        self.compress = theano.function(inputs=[self.input], outputs=self.output)
        if verbose:
            logger.debug('Finished constructing the structure of ExtGrCNN Encoder: ')
            logger.debug('Size of the input dimension: %d' % fan_in)
            logger.debug('Size of the hidden dimension: %d' % fan_out)
            logger.debug('Number of gating functions: %d' % config.num_gates)
            logger.debug('Number of parameters in ExtGrCNN: %d' % self.num_params)
            logger.debug('Activation function: %s' % config.activation)
Пример #27
0
class GrCNNEncoder(object):
    '''
    (Binary) Gated Recursive Convolutional Neural Network Encoder.
    '''
    def __init__(self, config=None, verbose=True):
        '''
        @config: GRCNNConfiger. Configer used to set the architecture of GRCNNEncoder.
        ''' 
        if verbose: logger.debug('Building Gated Recursive Convolutional Neural Network Encoder...')
        # Scale factor for initializing parameters
        self.scale = config.scale
        # Make theano symbolic tensor for input and model parameters
        self.input = T.matrix(name='GrCNN Encoder input', dtype=floatX)
        # Configure activation function
        self.act = Activation(config.activation)
        fan_in, fan_out = config.num_input, config.num_hidden
        # Initialize model parameters
        # Set seed of the random generator
        np.random.seed(config.random_seed)
        # Projection matrix U
        # Initialize all the matrices using orthogonal matrices        
        U_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_in, fan_out))
        U_val = U_val.astype(floatX)
        U_val *= self.scale
        self.U = theano.shared(value=U_val, name='U', borrow=True)
        self.hidden0 = T.dot(self.input, self.U)

        # W^l, W^r, parameters used to construct the central hidden representation
        Wl_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out))
        Wl_val = Wl_val.astype(floatX)
        Wl_val, _, _ = np.linalg.svd(Wl_val)
        # Wl_val *= self.scale
        self.Wl = theano.shared(value=Wl_val, name='W_l', borrow=True)

        Wr_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out))
        Wr_val = Wr_val.astype(floatX)
        Wr_val, _, _ = np.linalg.svd(Wr_val)
        # Wr_val *= self.scale
        self.Wr = theano.shared(value=Wr_val, name='W_r', borrow=True)
        
        self.Wb = theano.shared(value=np.zeros(fan_out, dtype=floatX), name='Wb', borrow=True)
        
        # G^l, G^r, parameters used to construct the three-way coefficients
        Gl_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, 3))
        Gl_val = Gl_val.astype(floatX)
        self.Gl = theano.shared(value=Gl_val, name='G_l', borrow=True)

        Gr_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, 3))
        Gr_val = Gr_val.astype(floatX)
        self.Gr = theano.shared(value=Gr_val, name='G_r', borrow=True)

        self.Gb = theano.shared(value=np.zeros(3, dtype=floatX), name='Gb', borrow=True)
        # Save all the parameters into one batch
        self.params = [self.U, self.Wl, self.Wr, self.Wb, self.Gl, self.Gr, self.Gb]
        # Compute the total number of parameters
        self.num_params = reduce(lambda x, y: x+np.prod(y.get_value().shape), self.params, 0)
        # Length of the time sequence
        self.nsteps = self.input.shape[0]
        self.pyramids, _ = theano.scan(fn=self._step_prop, 
                                    sequences=T.arange(self.nsteps-1),
                                    non_sequences=self.nsteps,
                                    outputs_info=[self.hidden0],
                                    n_steps=self.nsteps-1)
        self.output = self.pyramids[-1][0].dimshuffle('x', 0)
        # Compression -- Encoding function
        self.compress = theano.function(inputs=[self.input], outputs=self.output)
        if verbose:
            logger.debug('Finished constructing the structure of grCNN Encoder: ')
            logger.debug('Size of the input dimension: %d' % fan_in)
            logger.debug('Size of the hidden dimension: %d' % fan_out)
            logger.debug('Activation function: %s' % config.activation)

    def _step_prop(self, iter, current_level, nsteps):
        '''
        @current_level: Input matrix at current level. The first dimension corresponds to 
        the timestamp while the second dimension corresponds to the dimension of hidden representation
        '''
        # Build shifted matrix, due to the constraints of Theano.scan, we have to keep the shape of the
        # input and output matrix
        left_current_level = current_level[:nsteps-iter-1]
        right_current_level = current_level[1:nsteps-iter]
        # Compute temporary central hidden representation, of size Txd, but we only care about the first
        # T-1 rows, i.e., we only focus on the (T-1)xd sub-matrix.
        central_current_level = self.act.activate(T.dot(left_current_level, self.Wl) + 
                                                  T.dot(right_current_level, self.Wr) + 
                                                  self.Wb)
        # Compute gating function, of size Tx3. Again, due to the internal limitation of Theano.scan, we cannot
        # reduce the size of the matrix and have to keep the same size, but actually we only want the first (T-1)x3
        # sub-matrix.
        current_gates = T.nnet.softmax(T.dot(left_current_level, self.Gl) + 
                                       T.dot(right_current_level, self.Gr) + 
                                       self.Gb)
        left_gate, central_gate, right_gate = current_gates[:, 0], current_gates[:, 1], current_gates[:, 2]
        # Reshape for broadcasting
        left_gate = left_gate.dimshuffle(0, 'x')
        central_gate = central_gate.dimshuffle(0, 'x')
        right_gate = right_gate.dimshuffle(0, 'x')
        # Build next level of hidden representation using soft combination,
        # matrix of size (T-1)xd
        next_level = left_gate * left_current_level + \
                     right_gate * right_current_level + \
                     central_gate * central_current_level
        return T.set_subtensor(current_level[:nsteps-iter-1], next_level)

    def _step_prop_reduce(self, current_level):
        '''
        @current_level: Input matrix at current level. The first dimension corresponds to 
        the timestamp while the second dimension corresponds to the dimension of hidden representation

        Reduced version of level propagation, much more memory and time efficient implementation, but cannot
        be used inside theano.scan because theano.scan requires that the input and output through timestamps should
        have the same shape.
        '''
        # Build shifted matrix, due to the constraints of Theano.scan, we have to keep the shape of the
        # input and output matrix
        right_current_level = current_level[1:]
        left_current_level = current_level[:-1]
        # Compute temporary central hidden representation, of size Txd, but we only care about the first
        # T-1 rows, i.e., we only focus on the (T-1)xd sub-matrix.
        central_current_level = self.act.activate(T.dot(left_current_level, self.Wl) + 
                                                  T.dot(right_current_level, self.Wr) + 
                                                  self.Wb)
        # Compute gating function, of size Tx3. Again, due to the internal limitation of Theano.scan, we cannot
        # reduce the size of the matrix and have to keep the same size, but actually we only want the first (T-1)x3
        # sub-matrix.
        current_gates = T.nnet.softmax(T.dot(left_current_level, self.Gl) + 
                                       T.dot(right_current_level, self.Gr) + 
                                       self.Gb)
        left_gate, central_gate, right_gate = current_gates[:, 0], current_gates[:, 1], current_gates[:, 2]
        # Reshape for broadcasting
        left_gate = left_gate.dimshuffle(0, 'x')
        central_gate = central_gate.dimshuffle(0, 'x')
        right_gate = right_gate.dimshuffle(0, 'x')
        # Build next level of hidden representation using soft combination,
        # matrix of size (T-1)xd
        next_level = left_gate * left_current_level + \
                     right_gate * right_current_level + \
                     central_gate * central_current_level
        return next_level

    def encode(self, inputM):
        '''
        @input: Theano symbol matrix. Compress the input matrix into output vector.
        '''
        hidden = T.dot(inputM, self.U)
        # Length of the time sequence
        nsteps = inputM.shape[0]
        pyramids, _ = theano.scan(fn=self._step_prop, 
                                    sequences=T.arange(nsteps-1),
                                    non_sequences=nsteps,
                                    outputs_info=[hidden],
                                    n_steps=nsteps-1)
        output = pyramids[-1][0].dimshuffle('x', 0)
        return output

    def L2_loss(self):
        '''
        Return L2 norm of the model parameters.
        '''
        return T.sum(self.U ** 2) + T.sum(self.Wl ** 2) + T.sum(self.Wr ** 2) + \
               T.sum(self.Gl ** 2) + T.sum(self.Gr ** 2)
Пример #28
0
class ExtGrCNNEncoder(object):
    '''
    An extension of the canonical GrCNN, with more than 1 gate at each local binary window.
    '''
    def __init__(self, config, verbose=True):
        '''
        @config: GrCNNConfiger. Configer used to set the architecture of ExtGrCNNEncoder.
        '''
        if verbose: logger.debug('Building Extended Gated Recursive Convolutional Neural Network Encoder...')
        # Scale factor for initializing model parameters
        self.scale = config.scale
        # Make theano symbolic tensor for input and model parameters
        self.input = T.matrix(name='ExtGrCNNEncoder input', dtype=floatX)
        # Configure activation function
        self.act = Activation(config.activation)
        fan_in, fan_out = config.num_input, config.num_hidden
        # Initialize model parameter
        np.random.seed(config.random_seed)
        # Projection matrix U
        U_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_in, fan_out))
        U_val = U_val.astype(floatX)
        U_val *= self.scale
        self.U = theano.shared(value=U_val, name='U', borrow=True)
        self.hidden0 = T.dot(self.input, self.U)
        # 3rd-tensor to implement the multi-gate GrCNN Encoders, where the first dimension corresponds
        # to the number of gates
        Wl_vals = [np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out)).astype(floatX) for _ in xrange(config.num_gates)]
        Wl_vals = [np.linalg.svd(Wl_val)[0] for Wl_val in Wl_vals]
        Wl_vals = np.asarray(Wl_vals)
        self.Wl = theano.shared(value=Wl_vals, name='W_l', borrow=True)

        Wr_vals = [np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out)).astype(floatX) for _ in xrange(config.num_gates)]
        Wr_vals = [np.linalg.svd(Wr_val)[0] for Wr_val in Wr_vals]
        Wr_vals = np.asarray(Wr_vals)
        self.Wr = theano.shared(value=Wr_vals, name='W_r', borrow=True)

        self.Wb = theano.shared(value=np.zeros((config.num_gates, fan_out), dtype=floatX), name='W_b', borrow=True)
        # Multi-gate choosing functions
        Gl_vals = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, config.num_gates+2)).astype(floatX)
        self.Gl = theano.shared(value=Gl_vals, name='G_l', borrow=True)

        Gr_vals = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, config.num_gates+2)).astype(floatX)
        self.Gr = theano.shared(value=Gr_vals, name='G_r', borrow=True)

        self.Gb = theano.shared(value=np.zeros(config.num_gates+2, dtype=floatX), name='G_b', borrow=True)
        # Stack all the model parameters
        self.params = [self.U, self.Wl, self.Wr, self.Wb, self.Gl, self.Gr, self.Gb]
        self.num_params = fan_in * fan_out + 2 * config.num_gates * fan_out * fan_out + config.num_gates * fan_out + \
                          2 * (config.num_gates+2) * fan_out + config.num_gates + 2
        # Length of the time sequence
        self.nsteps = self.input.shape[0]
        # Building ExtGrCNNEncoder pyramids
        self.pyramids, _ = theano.scan(fn=self._step_prop, 
                                    sequences=T.arange(self.nsteps-1),
                                    non_sequences=self.nsteps,
                                    outputs_info=[self.hidden0],
                                    n_steps=self.nsteps-1)
        self.output = self.pyramids[-1][0].dimshuffle('x', 0)
        # Compression -- Encoding function
        self.compress = theano.function(inputs=[self.input], outputs=self.output)
        if verbose:
            logger.debug('Finished constructing the structure of ExtGrCNN Encoder: ')
            logger.debug('Size of the input dimension: %d' % fan_in)
            logger.debug('Size of the hidden dimension: %d' % fan_out)
            logger.debug('Number of gating functions: %d' % config.num_gates)
            logger.debug('Number of parameters in ExtGrCNN: %d' % self.num_params)
            logger.debug('Activation function: %s' % config.activation)

    def _step_prop(self, iter, current_level, nsteps):
        '''
        @current_level: Input matrix at current level. The first dimension corresponds to the time dimension 
        while the second dimension corresponds to the dimension of hidden representation
        '''
        # Building shifted matrix, due to the constraints of Theano.scan, we have to keep the shape of the 
        # input and output matrix, of size Txd
        left_current_level = current_level[:nsteps-iter-1]
        right_current_level = current_level[1:nsteps-iter]
        # Compute the temporary central multi-representation, of size TxKxd, where T is the dimension of 
        # time, K is the dimension of number of gates and d is the dimension of hidden representation
        multi_centrals = self.act.activate(T.dot(left_current_level, self.Wl) + 
                                           T.dot(right_current_level, self.Wr) + 
                                           self.Wb)
        # Compute the gating function, of size Tx(K+2)
        multi_gates = T.nnet.softmax(T.dot(left_current_level, self.Gl) + 
                                     T.dot(right_current_level, self.Gr) + 
                                     self.Gb)
        # Softmax-Gating combination
        multi_gates = multi_gates.dimshuffle(0, 1, 'x')
        next_level = multi_gates[:, 1:-1, :] * multi_centrals
        next_level = T.sum(next_level, axis=1)
        next_level += multi_gates[:, 0] * left_current_level + multi_gates[:, -1] * right_current_level
        return T.set_subtensor(current_level[:nsteps-iter-1], next_level)
 
    def encode(self, inputM):
        '''
        @input: Theano symbolic matrix. Compress the input matrix into output vector. The first dimension
                of inputM should correspond to the time dimension.
        '''
        hidden = T.dot(inputM, self.U)
        nsteps = inputM.shape[0]
        pyramids, _ = theano.scan(fn=self._step_prop, 
                                sequences=T.arange(nsteps-1),
                                non_sequences=nsteps, 
                                outputs_info=[hidden],
                                n_steps=nsteps-1)
        output = pyramids[-1][0].dimshuffle('x', 0)
        return output
Пример #29
0
	def __init__(self, config, verbose=True):
		if verbose: logger.debug('Building Bidirectional RNN Encoder...')
		self.input = T.matrix(name='BRNNEncoder_input')
		# Configure Activation function
		self.act = Activation(config.activation)
		# Build Bidirectional RNN
		num_input, num_hidden = config.num_input, config.num_hidden
		self.num_params = 2 * (num_input * num_hidden + num_hidden * num_hidden + num_hidden)
		# Initialize model parameters
		np.random.seed(config.random_seed)
		# 1, Feed-forward matrix for forward direction: W_forward
		W_forward_val = np.random.uniform(low=-1.0, high=1.0, size=(num_input, num_hidden))
		W_forward_val = W_forward_val.astype(floatX)
		self.W_forward = theano.shared(value=W_forward_val, name='W_forward', borrow=True)
		# 1, Feed-forward matrix for backward direction: W_backward
		W_backward_val = np.random.uniform(low=-1.0, high=1.0, size=(num_input, num_hidden))
		W_backward_val = W_backward_val.astype(floatX)
		self.W_backward = theano.shared(value=W_backward_val, name='W_backward', borrow=True)
		# 2, Recurrent matrix for forward direction: U_forward
		U_forward_val = np.random.uniform(low=-1.0, high=1.0, size=(num_hidden, num_hidden))
		U_forward_val = U_forward_val.astype(floatX)
		U_forward_val, _, _ = np.linalg.svd(U_forward_val)
		self.U_forward = theano.shared(value=U_forward_val, name='U_forward', borrow=True)
		# 2, Recurrent matrix for backward direction: U_backward
		U_backward_val = np.random.uniform(low=-1.0, high=1.0, size=(num_hidden, num_hidden))
		U_backward_val = U_backward_val.astype(floatX)
		U_backward_val, _, _ = np.linalg.svd(U_backward_val)
		self.U_backward = theano.shared(value=U_backward_val, name='U_backward', borrow=True)
		# 3, Bias parameter for the hidden-layer forward direction RNN
		b_forward_val = np.zeros(num_hidden, dtype=floatX)
		self.b_forward = theano.shared(value=b_forward_val, name='b_forward', borrow=True)
		# 3, Bias parameter for the hidden-layer backward direction RNN
		b_backward_val = np.zeros(num_hidden, dtype=floatX)
		self.b_backward = theano.shared(value=b_backward_val, name='b_backward', borrow=True)
		# h[0], zero vectors, treated as constants
		self.h0_forward = theano.shared(value=np.zeros(num_hidden, dtype=floatX), name='h0_forward', borrow=True)
		self.h0_backward = theano.shared(value=np.zeros(num_hidden, dtype=floatX), name='h0_backward', borrow=True)
		# Stack all the parameters
		self.params = [self.W_forward, self.W_backward, self.U_forward, self.U_backward, 
					   self.b_forward, self.b_backward]
		# Compute the forward and backward representation over time
		self.h_forwards, _ = theano.scan(fn=self._forward_step, 
										 sequences=self.input, 
										 outputs_info=[self.h0_forward],
										 truncate_gradient=config.bptt)
		self.h_backwards, _ = theano.scan(fn=self._backward_step,
										  sequences=self.input,
										  outputs_info=[self.h0_backward],
										  truncate_gradient=config.bptt,
										  go_backwards=True)
		# Average compressing
		self.h_forward = T.mean(self.h_forwards, axis=0)
		self.h_backward = T.mean(self.h_backwards, axis=0)
		# Concatenate
		self.output = T.concatenate([self.h_forward, self.h_backward], axis=0)
		# L1, L2 regularization
		self.L1_norm = T.sum(T.abs_(self.W_forward) + T.abs_(self.W_backward) + 
							 T.abs_(self.U_forward) + T.abs_(self.U_backward))
		self.L2_norm = T.sum(self.W_forward ** 2) + T.sum(self.W_backward ** 2) + \
					   T.sum(self.U_forward ** 2) + T.sum(self.U_backward ** 2)
		if verbose:
			logger.debug('Finished constructing the structure of BRNN Encoder: ')
			logger.debug('Size of the input dimension: %d' % num_input)
			logger.debug('Size of the hidden dimension: %d' % num_hidden)
			logger.debug('Activation function: %s' % config.activation)
Пример #30
0
    def __init__(self, configs=None, verbose=True):
        '''
		@config: CNNConfiger. Configer used to set the architecture of CNN.
		'''
        if verbose: pprint("Building Convolutional Neural Network...")
        # Make theano symbolic tensor for input and ground truth label
        self.input = T.tensor4(name='input', dtype=floatX)
        self.truth = T.ivector(name='label')
        self.learn_rate = T.scalar(name='learn rate')
        self.batch_size = configs.batch_size
        self.image_row = configs.image_row
        self.image_col = configs.image_col
        # There may have multiple convolution-pooling and multi-layer perceptrons.
        self.convpool_layers = []
        self.hidden_layers = []
        self.softmax_layers = []
        # Configure activation function
        self.act = Activation(configs.activation)
        # Configuration should be valid
        assert configs.num_convpool == len(configs.convs)
        assert configs.num_convpool == len(configs.pools)
        assert configs.num_hidden == len(configs.hiddens)
        assert configs.num_softmax == len(configs.softmaxs)
        # Construct random number generator
        srng = T.shared_randomstreams.RandomStreams(configs.random_seed)
        # Build architecture of CNN
        # Convolution and Pooling layers
        image_shapes, filter_shapes = [], []
        for i in xrange(configs.num_convpool):
            if i == 0:
                image_shapes.append(
                    (self.batch_size, 1, self.image_row, self.image_col))
                filter_shapes.append(
                    (configs.convs[i][0], 1, configs.convs[i][1],
                     configs.convs[i][2]))
            else:
                image_shapes.append(
                    (self.batch_size, configs.convs[i - 1][0],
                     (image_shapes[i - 1][2] - configs.convs[i - 1][1] + 1) /
                     configs.pools[i - 1][0],
                     (image_shapes[i - 1][3] - configs.convs[i - 1][2] + 1) /
                     configs.pools[i - 1][1]))
                filter_shapes.append(
                    (configs.convs[i][0], configs.convs[i - 1][0],
                     configs.convs[i][1], configs.convs[i][2]))
        for i in xrange(configs.num_convpool):
            if i == 0:
                current_input = self.input
            else:
                current_input = self.convpool_layers[i - 1].output
            self.convpool_layers.append(
                LeNetConvPoolLayer(input=current_input,
                                   filter_shape=filter_shapes[i],
                                   image_shape=image_shapes[i],
                                   poolsize=configs.pools[i],
                                   act=self.act))
        # Multilayer perceptron layers
        for i in xrange(configs.num_hidden):
            if i == 0:
                current_input = T.flatten(
                    self.convpool_layers[configs.num_convpool - 1].output, 2)
            else:
                current_input = self.hidden_layers[i - 1].output
            # Adding dropout to hidden layers
            hidden_layer = HiddenLayer(current_input,
                                       configs.hiddens[i],
                                       act=self.act)
            mask = srng.binomial(n=1,
                                 p=1 - configs.dropout,
                                 size=hidden_layer.shape)
            hidden_layer *= T.cast(mask, floatX)
            self.hidden_layers.append(hidden_layer)
        # Softmax Layer, for most case, the architecture will only contain one softmax layer
        for i in xrange(configs.num_softmax):
            if i == 0:
                current_input = self.hidden_layers[configs.num_hidden -
                                                   1].output
            else:
                current_input = self.softmax_layers[i - 1].output
            self.softmax_layers.append(
                SoftmaxLayer(current_input, configs.softmaxs[i]))
        # Output
        self.pred = self.softmax_layers[configs.num_softmax - 1].prediction()
        # Cost function with ground truth provided
        self.cost = self.softmax_layers[configs.num_softmax - 1].NLL_loss(
            self.truth)
        # Build cost function
        # Stack all the parameters
        self.params = []
        for convpool_layer in self.convpool_layers:
            self.params.extend(convpool_layer.params)
        for hidden_layer in self.hidden_layers:
            self.params.extend(hidden_layer.params)
        for softmax_layer in self.softmax_layers:
            self.params.extend(softmax_layer.params)
        # Compute gradient of self.cost with respect to network parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Stochastic gradient descent learning algorithm
        self.updates = []
        for param, gradparam in zip(self.params, self.gradparams):
            self.updates.append((param, param - self.learn_rate * gradparam))
        # Build objective function
        self.objective = theano.function(
            inputs=[self.input, self.truth, self.learn_rate],
            outputs=self.cost,
            updates=self.updates)
        # Build prediction function
        self.predict = theano.function(inputs=[self.input], outputs=self.pred)
        if verbose:
            pprint('Architecture building finished, summarized as below: ')
            pprint(
                'There are %d layers (not including the input layer) algether: '
                % (configs.num_convpool * 2 + configs.num_hidden +
                   configs.num_softmax))
            pprint('%d convolution layers + %d maxpooling layers.' %
                   (len(self.convpool_layers), len(self.convpool_layers)))
            pprint('%d hidden layers.' % (len(self.hidden_layers)))
            pprint('%d softmax layers.' % (len(self.softmax_layers)))
            pprint('=' * 50)
            pprint('Detailed architecture of each layer: ')
            pprint('-' * 50)
            pprint('Convolution and Pooling layers: ')
            for i in xrange(len(self.convpool_layers)):
                pprint('Convolution Layer %d: ' % i)
                pprint(
                    '%d feature maps, each has a filter kernel with size (%d, %d)'
                    % (configs.convs[i][0], configs.convs[i][1],
                       configs.convs[i][2]))
            pprint('-' * 50)
            pprint('Hidden layers: ')
            for i in xrange(len(self.hidden_layers)):
                pprint('Hidden Layer %d: ' % i)
                pprint('Input dimension: %d, Output dimension: %d' %
                       (configs.hiddens[i][0], configs.hiddens[i][1]))
            pprint('-' * 50)
            pprint('Softmax layers: ')
            for i in xrange(len(self.softmax_layers)):
                pprint('Softmax Layer %d: ' % i)
                pprint('Input dimension: %d, Output dimension: %d' %
                       (configs.softmaxs[i][0], configs.softmaxs[i][1]))
Пример #31
0
class NNWordBasedAttention(WordBasedAttention):
    """
    Neural Machine Translation By Jointly Learning To Align and Translate
    Dzmitry Bahdanau, KyungHyun Cho, and Yoshua Bengio
    In Proceedings of ICLR 2015
    http://arxiv.org/abs/1409.0473v3
    """
    def __init__(self,
                 word_dim,
                 seq_dim,
                 hidden_dim,
                 activation='tanh',
                 initializer=default_initializer):
        super(NNWordBasedAttention,
              self).__init__(word_dim=word_dim,
                             seq_dim=seq_dim,
                             initializer=default_initializer)
        # (dim, dim)
        self.hidden_dim = hidden_dim
        self.W = shared_rand_matrix((self.word_dim, self.hidden_dim),
                                    'Attention_W', initializer)
        self.U = shared_rand_matrix((self.seq_dim, self.hidden_dim),
                                    'Attention_U', initializer)
        self.v = shared_rand_matrix((self.hidden_dim, ), 'Attention_v',
                                    initializer)
        self.act = Activation(activation)
        self.params = [self.W]
        self.norm_params = [self.W]

    def score(
        self,
        word,
        sequence,
    ):
        """
        :param word: (word_dim, )
        :param sequence: (length, seq_dim)
        :return: score: (length, )
        """
        # (word_dim, ) dot (word_dim, hidden_dim) -> (hidden_dim, )
        hidden1 = T.dot(word, self.W)
        # (length, seq_dim) dot (seq_dim, hidden_dim) -> (length, hidden_dim)
        hidden2 = T.dot(sequence, self.U)
        # (hidden_dim, ) + (length, hidden_dim) -> (length, hidden_dim)
        hidden = hidden1[None, :] + hidden2
        # (length, hidden_dim) -> (length, hidden_dim)
        act_hidden = self.act.activate(hidden)
        # (length, hidden_dim) dot (hidden_dim, ) -> (length, )
        score = T.dot(act_hidden, self.v)
        return score

    def score_batch(
        self,
        word,
        sequence,
    ):
        """
        :param word: (batch, word_dim)
        :param sequence: (batch, length, seq_dim)
        :return: score: (batch, length, )
        """
        # (batch, word_dim) dot (word_dim, hidden_dim) -> (batch, hidden_dim)
        hidden1 = T.dot(word, self.W)
        # (batch, length, seq_dim) dot (seq_dim, hidden_dim) -> (batch, length, hidden_dim)
        hidden2 = T.dot(sequence, self.U)
        # (batch, length, hidden_dim) + (batch, hidden_dim) -> (batch, length, hidden_dim)
        hidden = hidden1[:, None, :] + hidden2
        # (batch, length, hidden_dim) -> (batch, length, hidden_dim)
        act_hidden = self.act.activate(hidden)
        # (batch, length, hidden_dim) dot (hidden_dim, ) -> (batch, length, )
        score = T.dot(act_hidden, self.v)
        return score
Пример #32
0
class BRNN(object):
	'''
	Bidirectional RNN. This is just a trial for using 
	BRNN as a tool for sentence modeling.

	First trial on the task of sentiment analysis.
	'''
	def __init__(self, configs, verbose=True):
		if verbose: pprint('Build Tied weights Bidirectional Recurrent Neural Network')
		self.input = T.matrix(name='input')
		self.truth = T.ivector(name='label')
		self.learn_rate = T.scalar(name='learn rate')
		# Configure Activation function
		self.act = Activation(configs.activation)
		# Build bidirectional RNN with tied weights
		num_input, num_hidden, num_class = configs.num_input, configs.num_hidden, configs.num_class
		# Stack all the variables together into a vector in order to apply the batch updating algorithm
		# Since there are two directions for the RNN, all the weight matrix associated with RNN will be 
		# duplicated
		num_params = 2 * (num_input * num_hidden + \
					 num_hidden * num_hidden + \
					 num_hidden) + \
					 2 * num_hidden * num_class + \
					 num_class
		self.num_params = num_params
		self.theta = theano.shared(value=np.zeros(num_params, dtype=floatX), name='theta', borrow=True)
		# Incremental index
		param_idx = 0
		# 1, Feed-forward matrix for forward direction: W_forward
		self.W_forward = self.theta[param_idx: param_idx+num_input*num_hidden].reshape((num_input, num_hidden))
		self.W_forward.name = 'W_forward_RNN'
		W_forward_init = np.asarray(np.random.uniform(low=-np.sqrt(6.0/(num_input+num_hidden)),
									  		  		  high=np.sqrt(6.0/(num_input+num_hidden)),
									  		  		  size=(num_input, num_hidden)), dtype=floatX)
		param_idx += num_input * num_hidden
		# 1, Feed-forward matrix for backward direction: W_backward
		self.W_backward = self.theta[param_idx: param_idx+num_input*num_hidden].reshape((num_input, num_hidden))
		self.W_backward.name = 'W_backward_RNN'
		W_backward_init = np.asarray(np.random.uniform(low=-np.sqrt(6.0/(num_input+num_hidden)),
													   high=np.sqrt(6.0/(num_input+num_hidden)),
													   size=(num_input, num_hidden)), dtype=floatX)
		param_idx += num_input * num_hidden
		# 2, Recurrent matrix for forward direction: U_forward
		self.U_forward = self.theta[param_idx: param_idx+num_hidden*num_hidden].reshape((num_hidden, num_hidden))
		self.U_forward.name = 'U_forward_RNN'
		U_forward_init = np.asarray(np.random.uniform(low=-np.sqrt(6.0/(num_hidden+num_hidden)),
													  high=np.sqrt(6.0/(num_hidden+num_hidden)),
													  size=(num_hidden, num_hidden)), dtype=floatX)
		param_idx += num_hidden * num_hidden
		# 2, Recurrent matrix for backward direction: U_backward
		self.U_backward = self.theta[param_idx: param_idx+num_hidden*num_hidden].reshape((num_hidden, num_hidden))
		self.U_backward.name = 'U_backward_RNN'
		U_backward_init = np.asarray(np.random.uniform(low=-np.sqrt(6.0/(num_hidden+num_hidden)),
													   high=np.sqrt(6.0/(num_hidden+num_hidden)),
													   size=(num_hidden, num_hidden)), dtype=floatX)
		param_idx += num_hidden * num_hidden
		# 3, Bias parameter for the hidden-layer forward direction RNN
		self.b_forward = self.theta[param_idx: param_idx+num_hidden]
		self.b_forward.name = 'b_forward_RNN'
		b_forward_init = np.zeros(num_hidden, dtype=floatX)		
		param_idx += num_hidden
		# 3, Bias parameter for the hidden-layer backward direction RNN
		self.b_backward = self.theta[param_idx: param_idx+num_hidden]
		self.b_backward.name = 'b_backward_RNN'
		b_backward_init = np.zeros(num_hidden, dtype=floatX)
		param_idx += num_hidden
		# Weight matrix for softmax function
		self.W_softmax = self.theta[param_idx: param_idx+2*num_hidden*num_class].reshape((2*num_hidden, num_class))
		self.W_softmax.name = 'W_softmax'
		W_softmax_init = np.asarray(np.random.uniform(low=-np.sqrt(6.0/(2*num_hidden+num_class)), 
													  high=np.sqrt(6.0/(2*num_hidden+num_class)),
													  size=(2*num_hidden, num_class)), dtype=floatX)
		param_idx += 2*num_hidden*num_class
		# Bias vector for softmax function
		self.b_softmax = self.theta[param_idx: param_idx+num_class]
		self.b_softmax.name = 'b_softmax'
		b_softmax_init = np.zeros(num_class, dtype=floatX)
		param_idx += num_class
		# Set all the default parameters into theta
		self.theta.set_value(np.concatenate([x.ravel() for x in 
			(W_forward_init, W_backward_init, U_forward_init, U_backward_init, 
			 b_forward_init, b_backward_init, W_softmax_init, b_softmax_init)]))
		assert param_idx == num_params
		# h[0], zero vector, treated as constants
		self.h_start = theano.shared(value=np.zeros(num_hidden, dtype=floatX), name='h_start', borrow=True)
		self.h_end = theano.shared(value=np.zeros(num_hidden, dtype=floatX), name='h_end', borrow=True)
		# recurrent function used to compress a sequence of input vectors
		# the first dimension should correspond to time
		def forward_step(x_t, h_tm1):
			h_t = self.act.activate(T.dot(x_t, self.W_forward) + \
									T.dot(h_tm1, self.U_forward) + self.b_forward)
			return h_t
		def backward_step(x_t, h_tm1):
			h_t = self.act.activate(T.dot(x_t, self.W_backward) + \
									T.dot(h_tm1, self.U_backward) + self.b_backward)
			return h_t
		# Forward and backward representation over time
		self.forward_h, _ = theano.scan(fn=forward_step, sequences=self.input, outputs_info=[self.h_start],
										truncate_gradient=configs.bptt)
		self.backward_h, _ = theano.scan(fn=backward_step, sequences=self.input, outputs_info=[self.h_end], 
										 truncate_gradient=configs.bptt, go_backwards=True)
		# Store the final value
		# self.h_start_star = self.forward_h[-1]
		# self.h_end_star = self.backward_h[-1]
		self.h_start_star = T.mean(self.forward_h, axis=0)
		self.h_end_star = T.mean(self.backward_h, axis=0)
		# L1, L2 regularization
		self.L1_norm = T.sum(T.abs_(self.W_forward) + T.abs_(self.W_backward) + \
							 T.abs_(self.U_forward) + T.abs_(self.U_backward) + \
							 T.abs_(self.W_softmax))
		self.L2_norm = T.sum(self.W_forward ** 2) + T.sum(self.W_backward ** 2) + \
					   T.sum(self.U_forward ** 2) + T.sum(self.U_backward ** 2) + \
					   T.sum(self.W_softmax ** 2)
		# Build function to show the learned representation for different sentences
		self.show_forward = theano.function(inputs=[self.input], outputs=self.h_start_star)
		self.show_backward = theano.function(inputs=[self.input], outputs=self.h_end_star)
		##################################################################################
		# Correlated BRNN
		##################################################################################
		# Concatenate these two vectors into one
		self.h = T.concatenate([self.h_start_star, self.h_end_star], axis=0)
		# Dropout parameter
		srng = T.shared_randomstreams.RandomStreams(configs.random_seed)
		mask = srng.binomial(n=1, p=1-configs.dropout, size=self.h.shape)
		self.h *= T.cast(mask, floatX)
		# Use concatenated vector as input to the Softmax/MLP classifier
		self.output = T.nnet.softmax(T.dot(self.h, self.W_softmax) + self.b_softmax)		
		self.pred = T.argmax(self.output, axis=1)
		# Build cost function
		self.cost = -T.mean(T.log(self.output)[T.arange(self.truth.shape[0]), self.truth])
		if configs.regularization:
			self.cost += configs.lambda1 * self.L2_norm
		# Compute gradient
		self.gradtheta = T.grad(self.cost, self.theta)
		self.gradinput = T.grad(self.cost, self.input)
		# Build objective function
		# Compute the gradients to parameters
		self.compute_cost_and_gradient = theano.function(inputs=[self.input, self.truth], 
												outputs=[self.cost, self.gradtheta])
		# Compute the gradients to inputs
		self.compute_input_gradient = theano.function(inputs=[self.input, self.truth],
												outputs=self.gradinput)
		# Build prediction function
		self.predict = theano.function(inputs=[self.input], outputs=self.pred)
		if verbose:
			pprint('*' * 50)
			pprint('Finished constructing Bidirectional Recurrent Neural Network (BRNN)')
			pprint('Size of input dimension: %d' % configs.num_input)
			pprint('Size of hidden/recurrent dimension: %d' % configs.num_hidden)
			pprint('Size of output dimension: %d' % configs.num_class)
			pprint('Is regularization applied? %s' % ('yes' if configs.regularization else 'no'))
			if configs.regularization:
				pprint('Coefficient of regularization term: %f' % configs.lambda1)
			pprint('BPTT step: %d' % configs.bptt)
			pprint('Number of free parameters in BRNN: %d' % self.num_params)
			pprint('*' * 50)

	# This method is used to implement the batch updating algorithm
	def update_params(self, gradtheta, learn_rate):
		# gradparams is a single long vector which can be used to update self.theta
		# Learning algorithm: simple stochastic gradient descent
		theta = self.theta.get_value(borrow=True)
		self.theta.set_value(theta - learn_rate * gradtheta, borrow=True)

	@staticmethod
	def save(fname, model):
		with file(fname, 'wb') as fout:
			cPickle.dump(model, fout)

	@staticmethod
	def load(fname):
		with file(fname, 'rb') as fin:
			return cPickle.load(fin)
Пример #33
0
class TransEModel(EntityScorer):
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='iden',
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(TransEModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, entity_dim, entity_dim)
        self.W = shared_rand_matrix((relation_num, self.entity_dim),
                                    prefix + 'TransE_R', initializer)
        self.act = Activation(activation)
        self.params = [self.W]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W**2)

        if verbose:
            logger.debug(
                'Architecture of TransE Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)

    def score(self, e1, e2, r_index):
        """
        :param e1: (entity_dim, )
        :param e2: (entity_dim, )
        :param r_index: scalar
        :return: 
        """
        # (entity_dim, ) + (entity_dim, ) - (entity_dim, ) -> (entity_dim, )
        hidden = e1 + self.W[r_index] - e2
        # (entity_dim, ) -> scalar
        d = T.sum(hidden**2)
        return self.act.activate(d)

    def score_batch(self, e1, e2, r_index):
        """
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: (batch, )
        :return: 
        """
        # (batch, entity_dim, ) + (batch, entity_dim, ) - (batch, entity_dim, ) -> (batch, entity_dim, )
        hidden = e1 + self.W[r_index] - e2
        d = T.sum(hidden**2, axis=1)
        return self.act.activate(d)

    def score_one_relation(self, e1, e2, r_index):
        """
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: scalar
        :return: 
        """
        # (batch, entity_dim, ) + (batch, entity_dim, ) - (batch, entity_dim, ) -> (batch, entity_dim, )
        hidden = e1 + self.W[r_index][None, :] - e2
        d = T.sum(hidden**2, axis=1)
        return self.act.activate(d)
Пример #34
0
class NeuralTensorModel(EntityScorer):
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='tanh',
                 hidden=5,
                 keep_normal=False,
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(NeuralTensorModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        self.hidden = hidden
        self.slice_seq = T.arange(hidden)
        self.keep_normal = keep_normal
        # (relation_num, entity_dim, entity_dim, hidden)
        self.W = shared_rand_matrix(
            (relation_num, self.entity_dim, self.entity_dim, self.hidden),
            prefix + 'NTN_W', initializer)
        # (relation_num, hidden)
        self.U = shared_ones_matrix((relation_num, self.hidden),
                                    name=prefix + 'NTN_U')
        if keep_normal:
            # (relation_num, entity_dim, hidden)
            self.V = shared_rand_matrix(
                (relation_num, self.entity_dim * 2, self.hidden),
                prefix + 'NTN_V', initializer)
            # (relation_num, hidden)
            self.b = shared_zero_matrix((relation_num, self.hidden),
                                        name=prefix + 'NTN_B')
            self.params = [self.W, self.V, self.U, self.b]
            self.norm_params = [self.W, self.V, self.U, self.b]
        else:
            self.params = [self.W]
            self.norm_params = [self.W]
        self.act = Activation(activation)
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of Tensor Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)

    @staticmethod
    def step(_slice, e1, e2, w):
        """
        :param _slice: scalar
        :param e1: (entity_dim, )
        :param e2: (entity_dim, )
        :param w : (entity_dim, entity_dim, hidden)
        :return: 
        """
        # (entity_dim, ) dot (entity_dim, entity_dim) dot (entiy_dim) -> scalar
        return T.dot(e1, T.dot(w[_slice], e2))

    @staticmethod
    def step_relation(_slice, e1, e2, w):
        """
        :param _slice: scalar
        :param e1: (batch, entity_dim)
        :param e2: (batch, entity_dim)
        :param w : (entity_dim, entity_dim, hidden)
        :return: 
        """
        # (batch, entity_dim, ) dot (entity_dim, entity_dim) -> (batch, entity_dim)
        hidden = T.dot(e1, w[:, :, _slice])
        # (batch, entity_dim) dot (batch, entity_dim, ) -> (batch, )
        hidden = T.sum(hidden * e2, axis=1)
        return hidden

    @staticmethod
    def step_batch(_slice, e1, e2, w):
        """
        :param _slice: scalar
        :param e1: (batch, entity_dim)
        :param e2: (batch, entity_dim)
        :param w : (batch, entity_dim, entity_dim, hidden)
        :return: 
        """
        # (batch, entity_dim, ) dot (batch, entity_dim, entity_dim) -> (batch, entity_dim)
        hidden = T.batched_dot(e1, w[:, :, :, _slice])
        # (batch, entity_dim) dot (batch, entity_dim, ) -> (batch, )
        hidden = T.sum(hidden * e2, axis=1)
        return hidden

    def score(self, e1, e2, r_index):
        """
        :param e1: (entity_dim, )
        :param e2: (entity_dim, )
        :param r_index: scalar
        :return: 
        """
        # (entity_dim, ) dot (entity_dim, entity_dim, hidden) dot (entity_dim, ) -> (hidden, )
        hidden1_sep, _ = theano.scan(fn=self.step,
                                     sequences=[self.slice_seq],
                                     non_sequences=[e1, e2, self.W[r_index]],
                                     name='single_scan')
        hidden1 = T.concatenate([hidden1_sep])
        if self.keep_normal:
            # (2 * entity_dim, ) dot (2 * entity_dim, hidden) -> (hidden, )
            hidden2 = T.dot(T.concatenate([e1, e2]), self.V[r_index])
            # (hidden, ) + (hidden, ) + (hidden, ) -> (hidden, )
            hidden = hidden1 + hidden2 + self.b[r_index]
        else:
            hidden = hidden1
        # (hidden, ) -> (hidden, )
        act_hidden = self.act.activate(hidden)
        # (hidden, ) dot (hidden, ) -> scalar
        return T.dot(act_hidden, self.U[r_index])

    def score_batch(self, e1, e2, r_index):
        """
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: (batch, )
        :return: 
        """
        # (batch, entity_dim) dot (batch, entity_dim, entity_dim, hidden) dot (batch, entity_dim) -> hidden * (batch, )
        hidden1_sep, _ = theano.scan(fn=self.step_batch,
                                     sequences=[self.slice_seq],
                                     non_sequences=[e1, e2, self.W[r_index]],
                                     name='batch_scan')
        # hidden * (batch, ) -> (batch, hidden)
        hidden1 = T.concatenate([hidden1_sep], axis=1).transpose()
        if self.keep_normal:
            # (batch, 2 * entity_dim) dot (batch, 2 * entity_dim, hidden) -> (batch, hidden, )
            hidden2 = T.batched_dot(T.concatenate([e1, e2], axis=1),
                                    self.V[r_index])
            # (batch, hidden) + (batch, hidden) + (batch, hidden) -> (batch, hidden)
            hidden = hidden1 + hidden2 + self.b[r_index]
        else:
            hidden = hidden1
        # (batch, hidden) -> (batch, hidden)
        act_hidden = self.act.activate(hidden)
        # (batch, hidden) dot (batch, hidden) -> (batch, )
        return T.sum(act_hidden * self.U[r_index], axis=1)

    def score_one_relation(self, e1, e2, r_index):
        """
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: scalar
        :return: 
        """
        # (batch, entity_dim) dot (entity_dim, entity_dim, hidden) dot (batch, entity_dim) -> hidden * (batch, )
        hidden1_sep, _ = theano.scan(fn=self.step_relation,
                                     sequences=self.slice_seq,
                                     non_sequences=[e1, e2, self.W[r_index]],
                                     name='relation_scan')
        # hidden * (batch, ) -> (batch, hidden)
        hidden1 = T.concatenate([hidden1_sep], axis=1).transpose()
        if self.keep_normal:
            # (batch, 2 * entity_dim) dot (2 * entity_dim, hidden) -> (batch, hidden)
            hidden2 = T.dot(T.concatenate([e1, e2], axis=1), self.V[r_index])
            # (batch, hidden) + (batch, hidden) + (hidden) -> (batch, hidden)
            hidden = hidden1 + hidden2 + self.b[r_index][None, :]
        else:
            hidden = hidden1
        # (batch, hidden) -> (batch, hidden)
        act_hidden = self.act.activate(hidden)
        # (batch, hidden) dot (batch, hidden) -> (batch, )
        return T.sum(act_hidden * self.U[r_index], axis=1)
Пример #35
0
class SingleLayerModel(EntityScorer):
    def __init__(self,
                 entity_dim,
                 relation_num,
                 hidden=50,
                 activation='tanh',
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(SingleLayerModel, self).__init__()
        self.hidden = hidden
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, k, entity_dim)
        self.W_1 = shared_rand_matrix(
            (relation_num, self.hidden, self.entity_dim),
            prefix + 'SingleLayer_W1', initializer)
        # (relation_num, k, entity_dim)
        self.W_2 = shared_rand_matrix(
            (relation_num, self.hidden, self.entity_dim),
            prefix + 'SingleLayer_W2', initializer)
        # (relation_num, k, )
        self.u = shared_ones_matrix((
            relation_num,
            self.hidden,
        ), prefix + 'SingleLayer_u')
        self.act = Activation(activation)
        self.params = [self.W_1, self.W_2, self.u]
        self.norm_params = [self.W_1, self.W_2, self.u]
        self.l1_norm = T.sum(T.abs_(self.W_1)) + T.sum(T.abs_(
            self.W_2)) + T.sum(T.abs_(self.u))
        self.l2_norm = T.sum(self.W_1**2) + T.sum(self.W_2**2) + T.sum(self.u**
                                                                       2)

        if verbose:
            logger.debug(
                'Architecture of Single Layer Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)

    def score(self, e1, e2, r_index):
        """
        :param e1: (entity_dim, )
        :param e2: (entity_dim, )
        :param r_index: scalar
        :return: 
        """
        # (hidden, entity_dim) dot (entity_dim) + (hidden, entity_dim) dot (entity_dim) -> (hidden, )
        hidden = T.dot(self.W_1[r_index], e1) + T.dot(self.W_2[r_index], e2)
        # (hidden, ) -> (hidden, )
        act_hidden = self.act.activate(hidden)
        # (hidden, ) dot (hidden, ) -> 1
        return T.dot(self.u[r_index], act_hidden)

    def score_batch(self, e1, e2, r_index):
        """
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: (batch, )
        :return: 
        """
        # (batch, hidden, entity_dim) dot (batch, entity_dim) + (batch, hidden, entity_dim) dot (batch, entity_dim)
        hidden = T.batched_dot(self.W_1[r_index], e1)
        hidden += T.batched_dot(self.W_2[r_index], e2)
        # (batch, hidden) -> (batch, hidden)
        act_hidden = self.act.activate(hidden)
        # (batch, hidden) dot (batch, hidden) -> (batch, )
        return T.sum(act_hidden * self.u[r_index], axis=1)

    def score_one_relation(self, e1, e2, r_index):
        """
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: scalar
        :return: 
        """
        # (batch, entity_dim) dot (entity_dim, hidden) + (batch, entity_dim) dot (entity_dim, hidden) -> (batch, hidden)
        hidden = T.dot(e1, self.W_1[r_index].transpose()) + T.dot(
            e2, self.W_2[r_index].transpose())
        # (batch, hidden) -> (batch, hidden)
        act_hidden = self.act.activate(hidden)
        # (batch, hidden) dot (hidden, ) -> (batch, )
        return T.dot(act_hidden, self.u[r_index])
Пример #36
0
    def __init__(self, config=None, verbose=True):
        '''
        @config: GRCNNConfiger. Configer used to set the architecture of GRCNNEncoder.
        ''' 
        if verbose: logger.debug('Building Gated Recursive Convolutional Neural Network Encoder...')
        # Scale factor for initializing parameters
        self.scale = config.scale
        # Make theano symbolic tensor for input and model parameters
        self.input = T.matrix(name='GrCNN Encoder input', dtype=floatX)
        # Configure activation function
        self.act = Activation(config.activation)
        fan_in, fan_out = config.num_input, config.num_hidden
        # Initialize model parameters
        # Set seed of the random generator
        np.random.seed(config.random_seed)
        # Projection matrix U
        # Initialize all the matrices using orthogonal matrices        
        U_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_in, fan_out))
        U_val = U_val.astype(floatX)
        U_val *= self.scale
        self.U = theano.shared(value=U_val, name='U', borrow=True)
        self.hidden0 = T.dot(self.input, self.U)

        # W^l, W^r, parameters used to construct the central hidden representation
        Wl_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out))
        Wl_val = Wl_val.astype(floatX)
        Wl_val, _, _ = np.linalg.svd(Wl_val)
        # Wl_val *= self.scale
        self.Wl = theano.shared(value=Wl_val, name='W_l', borrow=True)

        Wr_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, fan_out))
        Wr_val = Wr_val.astype(floatX)
        Wr_val, _, _ = np.linalg.svd(Wr_val)
        # Wr_val *= self.scale
        self.Wr = theano.shared(value=Wr_val, name='W_r', borrow=True)
        
        self.Wb = theano.shared(value=np.zeros(fan_out, dtype=floatX), name='Wb', borrow=True)
        
        # G^l, G^r, parameters used to construct the three-way coefficients
        Gl_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, 3))
        Gl_val = Gl_val.astype(floatX)
        self.Gl = theano.shared(value=Gl_val, name='G_l', borrow=True)

        Gr_val = np.random.uniform(low=-1.0, high=1.0, size=(fan_out, 3))
        Gr_val = Gr_val.astype(floatX)
        self.Gr = theano.shared(value=Gr_val, name='G_r', borrow=True)

        self.Gb = theano.shared(value=np.zeros(3, dtype=floatX), name='Gb', borrow=True)
        # Save all the parameters into one batch
        self.params = [self.U, self.Wl, self.Wr, self.Wb, self.Gl, self.Gr, self.Gb]
        # Compute the total number of parameters
        self.num_params = reduce(lambda x, y: x+np.prod(y.get_value().shape), self.params, 0)
        # Length of the time sequence
        self.nsteps = self.input.shape[0]
        self.pyramids, _ = theano.scan(fn=self._step_prop, 
                                    sequences=T.arange(self.nsteps-1),
                                    non_sequences=self.nsteps,
                                    outputs_info=[self.hidden0],
                                    n_steps=self.nsteps-1)
        self.output = self.pyramids[-1][0].dimshuffle('x', 0)
        # Compression -- Encoding function
        self.compress = theano.function(inputs=[self.input], outputs=self.output)
        if verbose:
            logger.debug('Finished constructing the structure of grCNN Encoder: ')
            logger.debug('Size of the input dimension: %d' % fan_in)
            logger.debug('Size of the hidden dimension: %d' % fan_out)
            logger.debug('Activation function: %s' % config.activation)
images_path = './data/training_images'
annotations_path = './data/annotations'
classes_file = './data/classes.txt'
X, y = prepare_dataset(images_path, annotations_path, classes_file)
'''TRAINING PROCEDURE'''
from models_final import Sequential
from convolutions_final import Conv2D
from normalizations import BatchNormalization
from poolings import MaxPool2D
from dense_final import Flatten, Dense
from activations import Activation

model = Sequential()
model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer1", X.shape))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer1"))
model.add(Activation('relu'))
model.add(BatchNormalization(1, 0, 1e-5))

model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer2"))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer2"))
model.add(Activation('relu'))
model.add(BatchNormalization(1, 0, 1e-5))

model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer3"))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer3"))
model.add(Activation('relu'))
model.add(BatchNormalization(1, 0, 1e-5))

model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer4"))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer4"))
model.add(Activation('relu'))