def __init__(self, in_dim, hidden_dim, initializer=default_initializer, normalize=True, dropout=0, reconstructe=True, activation="tanh", verbose=True): """ :param in_dim: 输入维度 :param hidden_dim: 隐层维度 :param initializer: 随机初始化器 :param normalize: 是否归一化 :param dropout: dropout率 :param activation: 激活函数 :param verbose: 是否输出Debug日志内容 :return: """ self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim assert self.in_dim == self.hidden_dim self.initializer = initializer self.normalize = normalize self.dropout = dropout self.verbose = verbose self.act = Activation(activation) # Composition Function Weight # (dim, 2 * dim) self.W = shared_rand_matrix((self.hidden_dim, 2 * self.in_dim), 'W', initializer=initializer) # (dim, ) self.b = shared_zero_matrix((self.hidden_dim, ), 'b') # Reconstruction Function Weight # (2 * dim, dim) self.Wr = shared_rand_matrix((2 * self.in_dim, self.hidden_dim), 'Wr', initializer=initializer) # (2 * dim, ) self.br = shared_zero_matrix((self.in_dim * 2, ), 'br') self.params = [self.W, self.b, self.Wr, self.br] self.norm_params = [self.W, self.Wr] self.l1_norm = sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug( 'Architecture of RAE built finished, summarized as below: ') logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Normalize: %s' % self.normalize) logger.debug('Activation: %s' % self.act) logger.debug('Dropout Rate: %s' % self.dropout)
def __init__(self, entity_dim, relation_num, activation='tanh', hidden=5, keep_normal=False, initializer=default_initializer, prefix='', verbose=True): super(NeuralTensorModel, self).__init__() self.entity_dim = entity_dim self.relation_num = relation_num self.hidden = hidden self.slice_seq = T.arange(hidden) self.keep_normal = keep_normal # (relation_num, entity_dim, entity_dim, hidden) self.W = shared_rand_matrix( (relation_num, self.entity_dim, self.entity_dim, self.hidden), prefix + 'NTN_W', initializer) # (relation_num, hidden) self.U = shared_ones_matrix((relation_num, self.hidden), name=prefix + 'NTN_U') if keep_normal: # (relation_num, entity_dim, hidden) self.V = shared_rand_matrix( (relation_num, self.entity_dim * 2, self.hidden), prefix + 'NTN_V', initializer) # (relation_num, hidden) self.b = shared_zero_matrix((relation_num, self.hidden), name=prefix + 'NTN_B') self.params = [self.W, self.V, self.U, self.b] self.norm_params = [self.W, self.V, self.U, self.b] else: self.params = [self.W] self.norm_params = [self.W] self.act = Activation(activation) self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug( 'Architecture of Tensor Model built finished, summarized as below:' ) logger.debug('Entity Dimension: %d' % self.entity_dim) logger.debug('Hidden Dimension: %d' % self.hidden) logger.debug('Relation Number: %d' % self.relation_num) logger.debug('Initializer: %s' % initializer) logger.debug('Activation: %s' % activation)
def __init__(self, entity_dim, relation_num, activation='iden', initializer=default_initializer, prefix='', verbose=True): super(TransEModel, self).__init__() self.entity_dim = entity_dim self.relation_num = relation_num # (relation_num, entity_dim, entity_dim) self.W = shared_rand_matrix((relation_num, self.entity_dim), prefix + 'TransE_R', initializer) self.act = Activation(activation) self.params = [self.W] self.norm_params = [self.W] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2) if verbose: logger.debug( 'Architecture of TransE Model built finished, summarized as below:' ) logger.debug('Entity Dimension: %d' % self.entity_dim) logger.debug('Relation Number: %d' % self.relation_num) logger.debug('Initializer: %s' % initializer) logger.debug('Activation: %s' % activation)
def __init__(self, in_dim, activation, hidden_dim=None, transform_gate="sigmoid", prefix="", initializer=default_initializer, dropout=0, verbose=True): # By construction the dimensions of in_dim and out_dim have to match, and hence W_T and W_H are square matrices. if hidden_dim is not None: assert in_dim == hidden_dim if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(HighwayLayer, self).__init__(in_dim, in_dim, activation, prefix, initializer, dropout, verbose) self.transform_gate = Activation(transform_gate) self.W_H, self.W_H.name = self.W, prefix + "W_H" self.b_H, self.b_H.name = self.b, prefix + "b_H" self.W_T = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_T', initializer) self.b_T = shared_zero_matrix((self.hidden_dim,), prefix + 'b_T') self.params = [self.W_H, self.W_T, self.b_H, self.b_T] self.norm_params = [self.W_H, self.W_T] self.l1_norm = T.sum([T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param ** 2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format(self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Transform Gate: %s' % self.transform_gate.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, activation, prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.hidden_dim = hidden_dim self.out_dim = hidden_dim self.act = Activation(activation) self.dropout = dropout self.W = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W', initializer) self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b') self.params = [self.W, self.b] self.norm_params = [self.W] self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(RecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, dropout) self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim self.pooling = pooling self.dropout = dropout self.act = Activation(activation) # Composition Function Weight # Feed-Forward Matrix (hidden, in) self.W = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_forward', initializer) # Bias Term (hidden) self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward') # Recurrent Matrix (hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim), prefix + 'U_forward', initializer) self.params = [self.W, self.U, self.b] self.norm_params = [self.W, self.U] # L1, L2 Norm self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, word_dim, seq_dim, initializer=default_initializer): super(ConcatWordBasedAttention, self).__init__(word_dim=word_dim, seq_dim=seq_dim, initializer=default_initializer) # (word_dim + seq_dim) self.W = shared_rand_matrix((self.word_dim + self.seq_dim, ), 'Attention_W', initializer) self.params = [self.W] self.norm_params = [self.W]
def __init__(self, entity_dim, relation_num, hidden=50, activation='tanh', initializer=default_initializer, prefix='', verbose=True): super(SingleLayerModel, self).__init__() self.hidden = hidden self.entity_dim = entity_dim self.relation_num = relation_num # (relation_num, k, entity_dim) self.W_1 = shared_rand_matrix( (relation_num, self.hidden, self.entity_dim), prefix + 'SingleLayer_W1', initializer) # (relation_num, k, entity_dim) self.W_2 = shared_rand_matrix( (relation_num, self.hidden, self.entity_dim), prefix + 'SingleLayer_W2', initializer) # (relation_num, k, ) self.u = shared_ones_matrix(( relation_num, self.hidden, ), prefix + 'SingleLayer_u') self.act = Activation(activation) self.params = [self.W_1, self.W_2, self.u] self.norm_params = [self.W_1, self.W_2, self.u] self.l1_norm = T.sum(T.abs_(self.W_1)) + T.sum(T.abs_( self.W_2)) + T.sum(T.abs_(self.u)) self.l2_norm = T.sum(self.W_1**2) + T.sum(self.W_2**2) + T.sum(self.u** 2) if verbose: logger.debug( 'Architecture of Single Layer Model built finished, summarized as below:' ) logger.debug('Entity Dimension: %d' % self.entity_dim) logger.debug('Hidden Dimension: %d' % self.hidden) logger.debug('Relation Number: %d' % self.relation_num) logger.debug('Initializer: %s' % initializer) logger.debug('Activation: %s' % activation)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=OrthogonalInitializer(), dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(LSTMEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, dropout) self.in_gate, self.forget_gate, self.out_gate = Activation( gates[0]), Activation(gates[1]), Activation(gates[2]) # W [in, forget, output, recurrent] (4 * hidden, in) self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim), prefix + 'W', initializer) # U [in, forget, output, recurrent] (4 * hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim), prefix + 'U', initializer) # b [in, forget, output, recurrent] (4 * hidden,) self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b') self.params = [self.W, self.U, self.b] self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, word_dim, seq_dim, hidden_dim, activation='tanh', initializer=default_initializer): super(NNWordBasedAttention, self).__init__(word_dim=word_dim, seq_dim=seq_dim, initializer=default_initializer) # (dim, dim) self.hidden_dim = hidden_dim self.W = shared_rand_matrix((self.word_dim, self.hidden_dim), 'Attention_W', initializer) self.U = shared_rand_matrix((self.seq_dim, self.hidden_dim), 'Attention_U', initializer) self.v = shared_rand_matrix((self.hidden_dim, ), 'Attention_v', initializer) self.act = Activation(activation) self.params = [self.W] self.norm_params = [self.W]
def __init__(self, in_dim, hidden_dim, kernel_size=3, padding='same', pooling='max', dilation_rate=1.0, activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True): """ Init Function for ConvolutionLayer :param in_dim: :param hidden_dim: :param kernel_size: :param padding: 'same', 'valid' :param pooling: 'max', 'mean', 'min' :param dilation_rate: :param activation: :param prefix: :param initializer: :param dropout: :param verbose: """ if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim self.kernel_size = kernel_size self.padding = padding self.dilation_rate = dilation_rate self.pooling = pooling self.dropout = dropout self.act = Activation(activation) self.padding_size = int(self.dilation_rate * (self.kernel_size - 1)) # Composition Function Weight # Kernel Matrix (kernel_size, hidden, in) self.W = shared_rand_matrix((self.kernel_size, self.hidden_dim, self.in_dim), prefix + 'W', initializer) # Bias Term (hidden) self.b = shared_zero_matrix((self.hidden_dim,), prefix + 'b') self.params = [self.W, self.b] self.norm_params = [self.W] # L1, L2 Norm self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W ** 2) if verbose: logger.debug('Architecture of {} built finished'.format(self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Filter Num (Hidden): %d' % self.hidden_dim) logger.debug('Kernel Size (Windows): %d' % self.kernel_size) logger.debug('Padding method : %s' % self.padding) logger.debug('Dilation Rate : %s' % self.dilation_rate) logger.debug('Padding Size : %s' % self.padding_size) logger.debug('Pooling method : %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, w=None, size=10000, dim=50, initializer=default_initializer, prefix=""): if w is None: # Random generate Matrix self.size = size self.dim = dim self.W = shared_rand_matrix(shape=(self.size, self.dim), initializer=initializer, name=prefix + 'Embedding') logger.info("Initialize %d Word with %s" % (self.size, initializer)) else: self.size = w.shape[0] self.dim = w.shape[1] self.W = shared_matrix(np.array(w, dtype=theano.config.floatX), name=prefix + 'Embedding') self.params = [self.W] self.norm_params = [self.W[1:]] self.l1_norm = T.sum(T.abs_(self.W[1:])) # sum([T.sum(T.abs_(param)) for param in self.params]) self.l2_norm = T.sum(self.W[1:] ** 2) # sum([T.sum(param ** 2) for param in self.params])
def __init__(self, num_in, num_out, initializer=default_initializer, dropout=0, verbose=True): self.num_in = num_in self.num_out = num_out self.dropout = dropout self.W = shared_rand_matrix(shape=(num_in, num_out), name="softmax_W", initializer=initializer) self.b = shared_zero_matrix((num_out, ), 'softmax_b') self.params = [self.W, self.b] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension : %d' % self.num_in) logger.debug('Output Label Num: %d' % self.num_out) logger.debug('Dropout Rate : %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=default_initializer, bidirection_shared=False, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(BiLSTMEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, gates, prefix, initializer, dropout, verbose) self.out_dim = hidden_dim * 2 # Composition Function Weight -- Gates if bidirection_shared: # W [in, forget, output, recurrent] self.W_forward, self.W_forward.name = self.W, prefix + "W_shared" self.W_backward = self.W_forward # U [in, forget, output, recurrent] self.U_forward, self.U_forward.name = self.U, prefix + "U_shared" self.U_backward = self.U_forward # b [in, forget, output, recurrent] self.b_forward, self.b_forward.name = self.b, prefix + "b_shared" self.b_backward = self.b_forward self.params = [self.W_forward, self.U_forward, self.b_forward] self.norm_params = [self.W_forward, self.U_forward] else: # W [in, forget, output, recurrent] self.W_forward, self.W_forward.name = self.W, prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim * 4, self.in_dim), prefix + 'W_backward', initializer) # U [in, forget, output, recurrent] self.U_forward, self.U_forward.name = self.U, prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim * 4, self.hidden_dim), prefix + 'U_backward', initializer) # b [in, forget, output, recurrent] self.b_forward, self.b_forward.name = self.b, prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b_backward') self.params = [ self.W_forward, self.U_forward, self.b_forward, self.W_backward, self.U_backward, self.b_backward ] self.norm_params = [ self.W_forward, self.U_forward, self.W_backward, self.U_backward ] self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) if bidirection_shared: logger.debug('%s' % "Forward/Backward Shared Parameter") logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', prefix="", initializer=default_initializer, dropout=0, bidirection_shared=False, verbose=True): super(BiRecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, prefix, initializer, dropout, verbose) if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.out_dim = hidden_dim * 2 # Forward Direction - Backward Direction if bidirection_shared: # Feed-Forward Matrix (hidden, in) self.W_forward = self.W self.W_forward.name = prefix + "W_shared" self.W_backward = self.W_forward # Bias Term (hidden,) self.b_forward = self.b self.b_forward.name = prefix + "b_shared" self.b_backward = self.b_forward # Recurrent Matrix (hidden, hidden) self.U_forward = self.U self.U_forward.name = prefix + "U_shared" self.U_backward = self.U_forward self.params = [self.W_forward, self.U_forward, self.b_forward] self.norm_params = [self.W_forward, self.U_forward] else: # Feed-Forward Matrix (hidden, in) self.W_forward = self.W self.W_forward.name = prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim, self.in_dim), prefix + 'W_backward', initializer) # Bias Term (hidden,) self.b_forward = self.b self.b_forward.name = prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_backward') # Recurrent Matrix (hidden, hidden) self.U_forward = self.U self.U_forward.name = prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim, self.hidden_dim), prefix + 'U_backward', initializer) self.params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward, self.b_forward, self.b_backward ] self.norm_params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward ] # L1, L2 Norm self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)