def create_model(args, maxlen, vocab):

    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye((w_n.shape[0]).eval())))
        return args.ortho_reg*reg

    # 词汇表大小
    vocab_size = len(vocab)

    ##### Inputs #####
	# 正例的形状:batch_size * dim, 每个元素是在词汇表中的索引值, 每个句子有多少个词就有多少索引值
	# 负例的形状:batch_size * args.neg_size * dim, ditto
	# 得到w
    sentence_input = Input(batch_shape=(None, maxlen), dtype='int32', name='sentence_input')
    neg_input = Input(batch_shape=(None, args.neg_size, maxlen), dtype='int32', name='neg_input')

    ##### Construct word embedding layer #####
	# 嵌入层将正整数(下标)转换为具有固定大小的向量,如[[4],[20]]->[[0.25,0.1],[0.6,-0.2]]
	# keras.layers.embeddings.Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None)
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ##### Compute sentence representation #####
    # 计算句子嵌入,这里设计到keras的很多细节,日后还需要深入学习
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    # 计算负例的z_n
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    # 重构过程
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb',
            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    # 损失函数
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(input=[sentence_input, neg_input], output=loss)

    ### Word embedding and aspect embedding initialization ######
    # 如果定义了emb_path, 就用文件中的数值初始化E矩阵, T使用K-means初始化
    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size))

    return model
Exemplo n.º 2
0
def create_model(args, maxlen, vocab):

    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value)))
        return args.ortho_reg*reg

    vocab_size = len(vocab)

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ##### Compute sentence representation #####
    e_w = word_emb(sentence_input)
    #y_s = LSTM(args.emb_dim, return_sequences=False)(e_w)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    e_neg = word_emb(neg_input)
    #z_n = TimeDistributed(LSTM(args.emb_dim, return_sequences=False))(e_neg)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb',
            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=loss)

    ### Word embedding and aspect embedding initialization ######
    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        # model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()[0]))
        K.set_value(
            model.get_layer('word_emb').embeddings,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings)))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        K.set_value(
            model.get_layer('aspect_emb').W,
            emb_reader.get_aspect_matrix(args.aspect_size))
    return model
Exemplo n.º 3
0
###############################################################################################################################
## Building model

sentence_input = Input(shape=(args.kstep, node_size),
                       dtype='float32',
                       name='sentence_inputt')
neg_input = Input(shape=(args.neg_size, args.kstep, node_size),
                  dtype='float32',
                  name='neg_inputt')
predict = Input(shape=(17, ), dtype='int32', name='predictt')

e_w = sentence_input
y_s = Average()(sentence_input)

att_weights = Attention(name='att_weights')([e_w, y_s])
z_s = WeightedSum()([e_w, att_weights])

##### Compute representations of negative instances #####
e_neg = neg_input
z_n = Average()(e_neg)

##### Reconstruction2 #####
dense1 = Dense(512, activation='relu')(z_s)
#dense1 = noise.GaussianNoise(0.09)(dense1)
p_t = Dense(128, activation='relu')(dense1)
dense2 = Dense(512, activation='relu')(p_t)
new_p_t = Activation('softmax', name='p_t')(dense2)
r_s = Dense(node_size, init='uniform')(new_p_t)

##### Loss1 #####
Exemplo n.º 4
0
def create_model(args, maxlen, vocab):
    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = K.l2_normalize(weight_matrix, axis=-1)
        reg = K.sum(
            K.square(
                K.dot(w_n, K.transpose(w_n)) -
                K.eye(w_n.get_shape().as_list()[0])))
        return args.ortho_reg * reg

    vocab_size = len(vocab)

    if args.emb_name:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(
            os.path.join("..", "preprocessed_data", args.domain),
            args.emb_name)
        aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size)
        args.aspect_size = emb_reader.aspect_size
        args.emb_dim = emb_reader.emb_dim

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen),
                      dtype='int32',
                      name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb')
    #, embeddings_constraint=MaxNorm(10))

    ##### Compute sentence representation #####
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    #W_constraint=MaxNorm(10),
    #b_constraint=MaxNorm(10))([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(
        args.aspect_size,
        args.emb_dim,
        name='aspect_emb',
        #W_constraint=MaxNorm(10),
        W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=[loss])

    ### Word embedding and aspect embedding initialization ######
    if args.emb_name:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing word embedding matrix')
        embs = model.get_layer('word_emb').embeddings
        K.set_value(
            embs,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs)))
        logger.info(
            'Initializing aspect embedding matrix as centroid of kmean clusters'
        )
        K.set_value(model.get_layer('aspect_emb').W, aspect_matrix)

    return model
Exemplo n.º 5
0
def create_model(args, kstep, node_size):

    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].eval())))
        return args.ortho_reg*reg


    ##### Inputs #####
    sentence_input = Input(shape=(kstep, node_size), dtype='float32', name='sentence_input')
    neg_input = Input(shape=(args.neg_size, kstep, node_size), dtype='float32', name='neg_input')

    print("sentence_input.ndim", sentence_input.ndim)
    print("neg_input.ndim", neg_input.ndim)

    e_w = sentence_input
    y_s = Average()(sentence_input)




    print(y_s.ndim)
    print(e_w.ndim)
    print(K.int_shape(e_w),   K.int_shape(y_s))



    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    print("z_s----------- %d", (z_s.ndim))

    ##### Compute representations of negative instances #####
    #e_neg = word_emb(neg_input)
    e_neg = neg_input
    z_n = Average()(e_neg)


    print("e_neg.ndim")
    print(e_neg.ndim)
    print("z_n.ndim")
    print(z_n.ndim)




    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, 2405, name='aspect_emb',
            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####

    print("losssssssssssssssssssssssssssssssssssssssssssssssssssssssssss")

    print(K.int_shape(z_s), K.int_shape(r_s))


    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(input=[sentence_input, neg_input], output=loss)







    ### Word embedding and aspect embedding initialization ######
    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        model.get_layer('word_emb').W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').W.get_value()))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size))

    return model
Exemplo n.º 6
0
def create_model(args, maxlen, vocab):
    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = K.l2_normalize(weight_matrix, axis=-1)  # K表示调用该函数的当前layer
        reg = K.sum(
            K.square(
                K.dot(w_n, K.transpose(w_n)) -
                K.eye(w_n.shape[0].value)))  # 自身矩阵的内积的平方根-自身特征值 = 越小, 越说明分量为0
        return args.ortho_reg * reg  # 这东西越小越好, 因为能保证各个特征分的越开

    vocab_size = len(vocab)

    if args.emb_name:  # 获取已经保存的embedding???
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(
            os.path.join(
                "/content/drive/My Drive/Attention-Based-Aspect-Extraction-master",
                "preprocessed_data", args.domain), args.emb_name)
        aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size)
        args.aspect_size = emb_reader.aspect_size
        args.emb_dim = emb_reader.emb_dim

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen),
                      dtype='int32',
                      name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb',
                         embeddings_constraint=MaxNorm(10))

    ##### Compute sentence representation ##### pre-processing 根据attention组合句子
    e_w = word_emb(sentence_input)  # 将input转换为embedding
    y_s = Average()(e_w)  # 默认求平均 layer
    att_weights = Attention(name='att_weights',
                            W_constraint=MaxNorm(10),
                            b_constraint=MaxNorm(10))([e_w,
                                                       y_s])  # attention layer
    z_s = WeightedSum()([e_w, att_weights])  # encoding layer

    ##### Compute representations of negative instances #####  增加准确性的tricks
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction ##### 构建dense层, 希望能够decoding attention sentences的特征
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax',
                     name='p_t')(p_t)  # softmax一下, nodes数量不改变, 数值被soft了一下
    r_s = WeightedAspectEmb(args.aspect_size,
                            args.emb_dim,
                            name='aspect_emb',
                            W_constraint=MaxNorm(10),
                            W_regularizer=ortho_reg)(
                                p_t)  # 标准化0-10区间, 且正则项为自定义的ortho_reg

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n,
                                         r_s])  # 自定义loss function??? 这是在做啥???
    model = Model(inputs=[sentence_input, neg_input],
                  outputs=[loss])  # negative input是需要自己分开数据集的吗??

    ### Word embedding and aspect embedding initialization ######
    if args.emb_name:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing word embedding matrix')
        embs = model.get_layer('word_emb').embeddings
        K.set_value(
            embs,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs)))
        logger.info(
            'Initializing aspect embedding matrix as centroid of kmean clusters'
        )  # 为何初始化要用到kmeans
        K.set_value(model.get_layer('aspect_emb').W, aspect_matrix)  # r-s

    return model
Exemplo n.º 7
0
def create_model(args, maxlen, vocab):
    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = K.l2_normalize(weight_matrix, axis=-1)
        reg = K.sum(
            K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value)))
        return args.ortho_reg * reg

    vocab_size = len(vocab)

    if args.emb_name:
        if args.emb_technique == "w2v":
            logger.info("Load {} glove embedding for {}".format(
                args.lang, config.word_emb_training_type))
            if args.lang == 'en':
                emb_reader = EmbReader(
                    config.emb_dir_en["w2v"].format(
                        config.word_emb_training_type), args.emb_name)
            elif args.lang == 'de':
                #emb_reader = EmbReader(config.emb_dir_de["w2v"].format(config.word_emb_training_type),
                #args.emb_name)
                emb_reader = FineTuneEmbed_cca(
                    '../preprocessed_data/german/w2v/fine_tuned', 'w2v_emb',
                    '../preprocessed_data/german/w2v/full_trained',
                    'w2v_embedding_300')
        elif args.emb_technique == 'fasttext':
            if args.lang == 'en':
                #emb_reader = FastTextEmbReader(config.emb_dir_en["fasttext"].format(config.word_emb_training_type),
                #args.emb_name, config.fine_tuned_enabled)
                emb_reader = FineTuneEmbed_ortho_procrustes(
                    '../preprocessed_data/fasttext/fine_tuned',
                    'fasttext_pre_trained',
                    '../preprocessed_data/fasttext/full_trained',
                    'w2v_embedding_skipgram_300')
            elif args.lang == 'de':
                emb_reader = FastTextEmbReader(
                    config.emb_dir_de["fasttext"].format(
                        config.word_emb_training_type), args.emb_name,
                    config.fine_tuned_enabled)
            #emb_reader = FineTuneEmbed_ortho_procrustes('../preprocessed_data/fasttext/fine_tuned','fasttext_pre_trained','../preprocessed_data/fasttext/full_trained', 'w2v_embedding_skipgram_300')
        elif args.emb_technique == "glove":
            if args.lang == 'de':
                logger.info('Load german glove embedding')
                emb_reader = GloveEmbedding(config.emb_dir_de["glove"],
                                            args.emb_name)
            else:
                logger.info("Load en glove embedding for {}".format(
                    config.word_emb_training_type))
                emb_reader = GloveEmbedding(
                    config.emb_dir_en["glove"].format(
                        config.word_emb_training_type), args.emb_name)
        elif args.emb_technique == "MUSE_supervised":
            emb_reader = MUSEEmbedding(config.emb_dir_biling['supervised'],
                                       args.emb_name)
        elif args.emb_technique == "MUSE_unsupervised":
            emb_reader = MUSEEmbedding(config.emb_dir_biling['unsupervised'],
                                       args.emb_name)
        aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size)
        args.aspect_size = emb_reader.aspect_size
        args.emb_dim = emb_reader.emb_dim

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen),
                      dtype='int32',
                      name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb',
                         embeddings_constraint=MaxNorm(10))

    ##### Compute sentence representation #####
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights',
                            W_constraint=MaxNorm(10),
                            b_constraint=MaxNorm(10))([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size,
                            args.emb_dim,
                            name='aspect_emb',
                            W_constraint=MaxNorm(10),
                            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=[loss])

    ### Word embedding and aspect embedding initialization ######
    if args.emb_name:
        logger.info('Initializing word embedding matrix')
        embs = model.get_layer('word_emb').embeddings
        K.set_value(
            embs,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs)))
        logger.info(
            'Initializing aspect embedding matrix as centroid of kmean clusters'
        )
        K.set_value(model.get_layer('aspect_emb').W, aspect_matrix)

    return model
Exemplo n.º 8
0
def create_model(args, vocab, num_outputs, overal_maxlen, maxlen_aspect):
    
    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout = args.dropout_W       
    recurrent_dropout = args.dropout_U  
    vocab_size = len(vocab)

    logger.info('Building a LSTM attention model to predict term/aspect sentiment')
    print '\n\n'

    ##### Inputs #####
    sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input')
    aspect_input = Input(shape=(maxlen_aspect,), dtype='int32', name='aspect_input')
    pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input')

    ##### construct word embedding layer #####
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ### represent aspect as averaged word embedding ###
    print 'use average term embs as aspect embedding'
    aspect_term_embs = word_emb(aspect_input)
    aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs)

    ### sentence representation ###
    sentence_output = word_emb(sentence_input)
    pretrain_output = word_emb(pretrain_input)


    print 'use a rnn layer'
    rnn = RNN(args.rnn_dim, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm')
    sentence_output = rnn(sentence_output)
    pretrain_output = rnn(pretrain_output)

    print 'use content attention to get term weights'
    att_weights = Attention(name='att_weights')([sentence_output, aspect_embs])
    sentence_output = WeightedSum()([sentence_output, att_weights])

    pretrain_output = Average(mask_zero=True)(pretrain_output)
  
    if args.dropout_prob > 0:
        print 'use dropout layer'
        sentence_output = Dropout(args.dropout_prob)(sentence_output)
        pretrain_output = Dropout(args.dropout_prob)(pretrain_output)


    sentence_output = Dense(num_outputs, name='dense_1')(sentence_output)
    pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_output)

    aspect_probs = Activation('softmax', name='aspect_model')(sentence_output)
    doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output)

    model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs])


    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.is_pretrain:

        import pickle

        print 'Set embedding, lstm, and dense weights from pre-trained models'
        if args.domain == 'lt':
            f_1 = open('../pretrained_weights/lstm_weights_lt%.1f.pkl'%(args.percetage), 'rb')
            f_2 = open('../pretrained_weights/dense_weights_lt%.1f.pkl'%(args.percetage), 'rb')
        else:
            f_1 = open('../pretrained_weights/lstm_weights_res%.1f.pkl'%(args.percetage), 'rb')
            f_2 = open('../pretrained_weights/dense_weights_res%.1f.pkl'%(args.percetage), 'rb')

        lstm_weights = pickle.load(f_1)
        dense_weights = pickle.load(f_2)
      
        model.get_layer('lstm').set_weights(lstm_weights)
        model.get_layer('dense_1').set_weights(dense_weights)
        model.get_layer('dense_2').set_weights(dense_weights)


    from w2vEmbReader import W2VEmbReader as EmbReader
    logger.info('Initializing lookup table')
    emb_path = '../glove/%s.txt'%(args.domain)
    emb_reader = EmbReader(args, emb_path)
    model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()))
    logger.info('  Done')

    return model
Exemplo n.º 9
0
def create_model(args, vocab, nb_class, overall_maxlen, doc_maxlen_1,
                 doc_maxlen_2):

    # Funtion that initializes word embeddings
    def init_emb(emb_matrix, vocab, emb_file_gen, emb_file_domain):

        print 'Loading pretrained general word embeddings and domain word embeddings ...'

        counter_gen = 0.
        pretrained_emb = open(emb_file_gen)
        for line in pretrained_emb:
            tokens = line.split()
            if len(tokens) != 301:
                continue
            word = tokens[0]
            vec = tokens[1:]
            try:
                emb_matrix[0][vocab[word]][:300] = vec
                counter_gen += 1
            except KeyError:
                pass

        if args.use_domain_emb:
            counter_domain = 0.
            pretrained_emb = open(emb_file_domain)
            for line in pretrained_emb:
                tokens = line.split()
                if len(tokens) != 101:
                    continue
                word = tokens[0]
                vec = tokens[1:]
                try:
                    emb_matrix[0][vocab[word]][300:] = vec
                    counter_domain += 1
                except KeyError:
                    pass

        pretrained_emb.close()
        logger.info(
            '%i/%i word vectors initialized by general embeddings (hit rate: %.2f%%)'
            % (counter_gen, len(vocab), 100 * counter_gen / len(vocab)))

        if args.use_domain_emb:
            logger.info(
                '%i/%i word vectors initialized by domain embeddings (hit rate: %.2f%%)'
                % (counter_domain, len(vocab),
                   100 * counter_domain / len(vocab)))

        return emb_matrix

    # Build model
    logger.info('Building model ...')
    print 'Building model ...'
    print '\n\n'

    vocab_size = len(vocab)

    ###################################
    # Inputs
    ###################################
    print 'Input layer'
    # sequence of token indices for aspect-level data
    sentence_input = Input(shape=(overall_maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    # gold opinion label for aspect-level data.
    op_label_input = Input(shape=(overall_maxlen, 3),
                           dtype=K.floatx(),
                           name='op_label_input')
    # probability of sending gold opinion labels at opinion transmission step
    p_gold_op = Input(shape=(overall_maxlen, ),
                      dtype=K.floatx(),
                      name='p_gold_op')

    if args.use_doc:
        # doc_input_1 denotes the data for sentiment classification
        # doc_input_2 denotes the data for domain classification
        doc_input_1 = Input(shape=(doc_maxlen_1, ),
                            dtype='int32',
                            name='doc_input_1')
        doc_input_2 = Input(shape=(doc_maxlen_2, ),
                            dtype='int32',
                            name='doc_input_2')

    #########################################
    # Shared word embedding layer
    #########################################
    print 'Word embedding layer'
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb')

    # aspect-level inputs
    word_embeddings = word_emb(sentence_input)
    sentence_output = word_embeddings

    # doc-level inputs
    if args.use_doc:
        doc_output_1 = word_emb(doc_input_1)
        # we only use general embedding for domain classification
        doc_output_2 = word_emb(doc_input_2)
        if args.use_domain_emb:
            # mask out the domain embeddings
            doc_output_2 = Remove_domain_emb()(doc_output_2)

    ######################################
    # Shared CNN layers
    ######################################

    for i in xrange(args.shared_layers):
        print 'Shared CNN layer %s' % i
        sentence_output = Dropout(args.dropout_prob)(sentence_output)
        if args.use_doc:
            doc_output_1 = Dropout(args.dropout_prob)(doc_output_1)
            doc_output_2 = Dropout(args.dropout_prob)(doc_output_2)

        if i == 0:
            conv_1 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=3, \
              activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_1')
            conv_2 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=5, \
              activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_2')

            sentence_output_1 = conv_1(sentence_output)
            sentence_output_2 = conv_2(sentence_output)
            sentence_output = Concatenate()(
                [sentence_output_1, sentence_output_2])

            if args.use_doc:

                doc_output_1_1 = conv_1(doc_output_1)
                doc_output_1_2 = conv_2(doc_output_1)
                doc_output_1 = Concatenate()([doc_output_1_1, doc_output_1_2])

                doc_output_2_1 = conv_1(doc_output_2)
                doc_output_2_2 = conv_2(doc_output_2)
                doc_output_2 = Concatenate()([doc_output_2_1, doc_output_2_2])

        else:
            conv = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
              activation='relu', padding='same', kernel_initializer=my_init, name='cnn_%s'%i)

            sentence_output = conv(sentence_output)

            if args.use_doc:
                doc_output_1 = conv(doc_output_1)
                doc_output_2 = conv(doc_output_2)

        word_embeddings = Concatenate()([word_embeddings, sentence_output])

    init_shared_features = sentence_output

    #######################################
    # Define task-specific layers
    #######################################

    # AE specific layers
    aspect_cnn = Sequential()
    for a in xrange(args.aspect_layers):
        print 'Aspect extraction layer %s' % a
        aspect_cnn.add(Dropout(args.dropout_prob))
        aspect_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='aspect_cnn_%s'%a))
    aspect_dense = Dense(nb_class, activation='softmax', name='aspect_dense')

    # AS specific layers
    sentiment_cnn = Sequential()
    for b in xrange(args.senti_layers):
        print 'Sentiment classification layer %s' % b
        sentiment_cnn.add(Dropout(args.dropout_prob))
        sentiment_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='sentiment_cnn_%s'%b))

    sentiment_att = Self_attention(args.use_opinion, name='sentiment_att')
    sentiment_dense = Dense(3, activation='softmax', name='sentiment_dense')

    if args.use_doc:
        # DS specific layers
        doc_senti_cnn = Sequential()
        for c in xrange(args.doc_senti_layers):
            print 'Document-level sentiment layers %s' % c
            doc_senti_cnn.add(Dropout(args.dropout_prob))
            doc_senti_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                      activation='relu', padding='same', kernel_initializer=my_init, name='doc_sentiment_cnn_%s'%c))

        doc_senti_att = Attention(name='doc_senti_att')
        doc_senti_dense = Dense(3, name='doc_senti_dense')
        # The reason not to use the default softmax is that it reports errors when input_dims=2 due to
        # compatibility issues between the tf and keras versions used.
        softmax = Lambda(lambda x: K.tf.nn.softmax(x),
                         name='doc_senti_softmax')

        # DD specific layers
        doc_domain_cnn = Sequential()
        for d in xrange(args.doc_domain_layers):
            print 'Document-level domain layers %s' % d
            doc_domain_cnn.add(Dropout(args.dropout_prob))
            doc_domain_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                      activation='relu', padding='same', kernel_initializer=my_init, name='doc_domain_cnn_%s'%d))

        doc_domain_att = Attention(name='doc_domain_att')
        doc_domain_dense = Dense(1,
                                 activation='sigmoid',
                                 name='doc_domain_dense')

    # re-encoding layer
    enc = Dense(args.cnn_dim, activation='relu', name='enc')

    ####################################################
    # aspect-level operations involving message passing
    ####################################################

    for i in xrange(args.interactions + 1):
        print 'Interaction number ', i
        aspect_output = sentence_output
        sentiment_output = sentence_output
        # note that the aspet-level data will also go through the doc-level models
        doc_senti_output = sentence_output
        doc_domain_output = sentence_output

        ### AE ###
        if args.aspect_layers > 0:
            aspect_output = aspect_cnn(aspect_output)
        # concate word embeddings and task-specific output for prediction
        aspect_output = Concatenate()([word_embeddings, aspect_output])
        aspect_output = Dropout(args.dropout_prob)(aspect_output)
        aspect_probs = aspect_dense(aspect_output)

        ### AS ###
        if args.senti_layers > 0:
            sentiment_output = sentiment_cnn(sentiment_output)

        sentiment_output = sentiment_att(
            [sentiment_output, op_label_input, aspect_probs, p_gold_op])
        sentiment_output = Concatenate()(
            [init_shared_features, sentiment_output])
        sentiment_output = Dropout(args.dropout_prob)(sentiment_output)
        sentiment_probs = sentiment_dense(sentiment_output)

        if args.use_doc:
            ### DS ###
            if args.doc_senti_layers > 0:
                doc_senti_output = doc_senti_cnn(doc_senti_output)
            # output attention weights with two activation functions
            senti_att_weights_softmax, senti_att_weights_sigmoid = doc_senti_att(
                doc_senti_output)
            # reshape the sigmoid attention weights, will be used in message passing
            senti_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
                senti_att_weights_sigmoid)

            doc_senti_output = WeightedSum()(
                [doc_senti_output, senti_att_weights_softmax])
            doc_senti_output = Dropout(args.dropout_prob)(doc_senti_output)
            doc_senti_output = doc_senti_dense(doc_senti_output)
            doc_senti_probs = softmax(doc_senti_output)
            # reshape the doc-level sentiment predictions, will be used in message passing
            doc_senti_probs = Lambda(lambda x: K.expand_dims(x, axis=-2))(
                doc_senti_probs)
            doc_senti_probs = Lambda(lambda x: K.repeat_elements(
                x, overall_maxlen, axis=1))(doc_senti_probs)

            ### DD ###
            if args.doc_domain_layers > 0:
                doc_domain_output = doc_domain_cnn(doc_domain_output)
            domain_att_weights_softmax, domain_att_weights_sigmoid = doc_domain_att(
                doc_domain_output)
            domain_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
                domain_att_weights_sigmoid)

            doc_domain_output = WeightedSum()(
                [doc_domain_output, domain_att_weights_softmax])
            doc_domain_output = Dropout(args.dropout_prob)(doc_domain_output)
            doc_domain_probs = doc_domain_dense(doc_domain_output)

            # update sentence_output for the next iteration
            sentence_output = Concatenate()([
                sentence_output, aspect_probs, sentiment_probs,
                doc_senti_probs, senti_weights, domain_weights
            ])

        else:
            # update sentence_output for the next iteration
            sentence_output = Concatenate()(
                [sentence_output, aspect_probs, sentiment_probs])

        sentence_output = enc(sentence_output)

    aspect_model = Model(inputs=[sentence_input, op_label_input, p_gold_op],
                         outputs=[aspect_probs, sentiment_probs])

    ####################################################
    # doc-level operations without message passing
    ####################################################

    if args.use_doc:
        if args.doc_senti_layers > 0:
            doc_output_1 = doc_senti_cnn(doc_output_1)
        att_1, _ = doc_senti_att(doc_output_1)
        doc_output_1 = WeightedSum()([doc_output_1, att_1])
        doc_output_1 = Dropout(args.dropout_prob)(doc_output_1)
        doc_output_1 = doc_senti_dense(doc_output_1)
        doc_prob_1 = softmax(doc_output_1)

        if args.doc_domain_layers > 0:
            doc_output_2 = doc_domain_cnn(doc_output_2)
        att_2, _ = doc_domain_att(doc_output_2)
        doc_output_2 = WeightedSum()([doc_output_2, att_2])
        doc_output_2 = Dropout(args.dropout_prob)(doc_output_2)
        doc_prob_2 = doc_domain_dense(doc_output_2)

        doc_model = Model(inputs=[doc_input_1, doc_input_2],
                          outputs=[doc_prob_1, doc_prob_2])

    else:
        doc_model = None

    ####################################################
    # initialize word embeddings
    ####################################################

    logger.info('Initializing lookup table')

    # Load pre-trained word vectors.
    # To save the loading time, here we load from the extracted subsets of the original embeddings,
    # which only contains the embeddings of words in the vocab.
    if args.use_doc:
        emb_path_gen = '../glove/%s_.txt' % (args.domain)
        emb_path_domain = '../domain_specific_emb/%s_.txt' % (args.domain)
    else:
        emb_path_gen = '../glove/%s.txt' % (args.domain)
        emb_path_domain = '../domain_specific_emb/%s.txt' % (args.domain)

    # Load pre-trained word vectors from the orginal large files
    # If you are loading from ssd, the process would only take 1-2 mins
    # If you are loading from hhd, the process would take a few hours at first try,
    # and would take 1-2 mins in subsequent repeating runs (due to cache performance).

    # emb_path_gen = '../glove.840B.300d.txt'
    # if args.domain == 'lt':
    #     emb_path_domain = '../laptop_emb.vec'
    # else:
    #     emb_path_domain = '../restaurant_emb.vec'

    aspect_model.get_layer('word_emb').set_weights(
        init_emb(
            aspect_model.get_layer('word_emb').get_weights(), vocab,
            emb_path_gen, emb_path_domain))

    logger.info('  Done')

    return aspect_model, doc_model
Exemplo n.º 10
0
def create_model(args, vocab, nb_class, overall_maxlen, doc_maxlen_1,
                 doc_maxlen_2, num_relations):

    # Funtion that initializes word embeddings
    def init_emb(emb_matrix, vocab, emb_file_gen, emb_file_domain):

        print 'Loading pretrained general word embeddings and domain word embeddings ...'

        counter_gen = 0.
        pretrained_emb = open(emb_file_gen)
        for line in pretrained_emb:
            tokens = line.split()
            if len(tokens) != 301:
                continue
            word = tokens[0]
            vec = tokens[1:]
            try:
                emb_matrix[0][vocab[word]][:300] = vec
                counter_gen += 1
            except KeyError:
                pass

        if args.use_domain_emb:
            counter_domain = 0.
            pretrained_emb = open(emb_file_domain)
            for line in pretrained_emb:
                tokens = line.split()
                if len(tokens) != 101:
                    continue
                word = tokens[0]
                vec = tokens[1:]
                try:
                    emb_matrix[0][vocab[word]][300:] = vec
                    counter_domain += 1
                except KeyError:
                    pass

        pretrained_emb.close()
        logger.info(
            '%i/%i word vectors initialized by general embeddings (hit rate: %.2f%%)'
            % (counter_gen, len(vocab), 100 * counter_gen / len(vocab)))

        if args.use_domain_emb:
            logger.info(
                '%i/%i word vectors initialized by domain embeddings (hit rate: %.2f%%)'
                % (counter_domain, len(vocab),
                   100 * counter_domain / len(vocab)))

        return emb_matrix

    # Build model
    logger.info('Building model ...')
    print 'Building model ...'
    print '\n\n'

    vocab_size = len(vocab)

    ###################################
    # Inputs
    ###################################
    print 'Input layer'
    # sequence of token indices for aspect-level data
    sentence_input = Input(shape=(overall_maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    A_in = [
        Input(shape=(overall_maxlen, overall_maxlen), name='A_in%s' % i)
        for i in range(num_relations)
    ]
    # gold opinion label for aspect-level data.
    op_label_input = Input(shape=(overall_maxlen, 3),
                           dtype=K.floatx(),
                           name='op_label_input')
    # probability of sending gold opinion labels at opinion transmission step
    p_gold_op = Input(shape=(overall_maxlen, ),
                      dtype=K.floatx(),
                      name='p_gold_op')
    mask = K.not_equal(sentence_input, 0)

    if args.use_doc:
        # doc_input_1 denotes the data for sentiment classification
        # doc_input_2 denotes the data for domain classification
        doc_input_1 = Input(shape=(doc_maxlen_1, ),
                            dtype='int32',
                            name='doc_input_1')
        doc_input_2 = Input(shape=(doc_maxlen_2, ),
                            dtype='int32',
                            name='doc_input_2')

    if args.use_bert:
        if args.bert_type == 'base':
            hs = 768
        bert_input = Input(shape=(overall_maxlen + 1, hs),
                           dtype=K.floatx(),
                           name='bert_input')  # +1 denote +cls

    #########################################
    # Shared word embedding layer
    #########################################
    print 'Word embedding layer'
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb')

    # aspect-level inputs
    word_embeddings = word_emb(sentence_input)
    sentence_output = word_embeddings

    # doc-level inputs
    if args.use_doc:
        doc_output_1 = word_emb(doc_input_1)
        # we only use general embedding for domain classification
        doc_output_2 = word_emb(doc_input_2)
        if args.use_domain_emb:
            # mask out the domain embeddings
            doc_output_2 = Remove_domain_emb()(doc_output_2)

    def slice(x, index):
        return x[:, index, :]

    def slice1(x, index):
        return x[:, index:, :]

    expand_dim = Lambda(lambda x: K.expand_dims(x, axis=1))
    if args.use_bert:
        #code.interact(local=locals())
        bert_inp = Lambda(slice1, arguments={'index': 1})(bert_input)
        bert_cls = Lambda(slice, arguments={'index': 0})(bert_input)
        sentence_output = Concatenate()([sentence_output, bert_inp])
        #       if args.use_bert_cls:
        #code.interact(local=locals())
        #bert_cls = bert_input[:,0,:]
        node_num = sentence_output.shape.as_list()[1]
        bert_cls1 = expand_dim(bert_cls)
        bert_cls = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(bert_cls1)

    if args.use_bert_cls == 0 and args.use_bert:
        sentence_output = Concatenate()([sentence_output, bert_cls])
    ######################################
    # Shared GCN + CNN layers
    ######################################
#    iter_gcn = Sequential()
#    iter_gcn.add(SpectralGraphConvolution(150, activation='relu',name='GCN'))
    expand_dim = Lambda(lambda x: K.expand_dims(x, axis=1))
    share_gcn_dense = Dense(300, activation='relu', name='share_gcn_dense')
    for i in xrange(args.shared_layers):
        print 'Shared GCN layer %s' % i
        sentence_output = Dropout(args.dropout_prob)(sentence_output)
        if args.use_doc:
            doc_output_1 = Dropout(args.dropout_prob)(doc_output_1)
            doc_output_2 = Dropout(args.dropout_prob)(doc_output_2)

        if i == 0:
            gcn_0 = SpectralGraphConvolution(args.gcn_dim,
                                             args.relation_dim,
                                             activation='relu',
                                             name='GCN_0')
            # conv_2 = SpectralGraphConvolution(args.cnn_dim, activation='relu',name='GCN_0_2')
            #expand_dim = Lambda(lambda x: K.expand_dims(x, axis = 1))

            sentence_output_0 = gcn_0([sentence_output] + A_in)
            H = Dropout(args.dropout_prob)(sentence_output_0)

            Global_graph = MyMeanPool(axis=1, smask=mask)(H)
            node_num = H.shape.as_list()[1]
            Gg = expand_dim(Global_graph)
            GG = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(Gg)
            # code.interact(local=locals())
            # HG = Concatenate()([H, GG])
            # sentence_output = HG
            if args.use_cnn:
                conv_1 = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=3, \
                activation='relu', padding='same', kernel_initializer=my_init, name='CNN_0_1')
                conv_2 = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                activation='relu', padding='same', kernel_initializer=my_init, name='CNN_0_2')
                sentence_output_1 = conv_1(sentence_output)
                sentence_output_2 = conv_2(sentence_output)

                if args.use_meanpool:
                    sentence_output = Concatenate()(
                        [sentence_output_1, sentence_output_2, GG, H])
                else:
                    sentence_output = Concatenate()(
                        [sentence_output_1, sentence_output_2, H])
            else:
                if args.use_meanpool:
                    sentence_output = Concatenate()([GG, H])
                else:
                    sentence_output = H

            if args.use_bert_cls == 1 and args.use_bert:
                sentence_output = Concatenate()([sentence_output, bert_cls])

            if args.use_doc:

                doc_output_1_1 = conv_1(doc_output_1)
                doc_output_1_2 = conv_2(doc_output_1)
                doc_output_1 = Concatenate()([doc_output_1_1, doc_output_1_2])

                doc_output_2_1 = conv_1(doc_output_2)
                doc_output_2_2 = conv_2(doc_output_2)
                doc_output_2 = Concatenate()([doc_output_2_1, doc_output_2_2])
            if args.shared_layers == 1:
                sentence_output = share_gcn_dense(sentence_output)

        else:
            # conv = Conv1DWithMasking(filters=args.cnn_dim/3, kernel_size=5, \
            #   activation='relu', padding='same', kernel_initializer=my_init, name='CNN_%s'%i)
            gcn = SpectralGraphConvolution(args.gcn_dim,
                                           args.relation_dim,
                                           activation='relu',
                                           name='GCN_%s' % i)
            #expand_dim = Lambda(lambda x: K.expand_dims(x, axis = 1))

            # sentence_output1 = conv(sentence_output)
            sentence_output2 = gcn([sentence_output] + A_in)
            H = Dropout(args.dropout_prob)(sentence_output2)

            Global_graph = MyMeanPool(axis=1, smask=mask)(H)
            node_num = H.shape.as_list()[1]
            Gg = expand_dim(Global_graph)
            GG = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(Gg)
            # code.interact(local=locals())
            if args.use_cnn:
                conv = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='CNN_%s'%i)
                sentence_output1 = conv(sentence_output)
                # sentence_output = Concatenate()([sentence_output_1, GG, H])
                if args.use_meanpool:
                    sentence_output = Concatenate()([sentence_output_1, GG, H])
                else:
                    sentence_output = Concatenate()([sentence_output_1, H])
            else:
                if args.use_meanpool:
                    sentence_output = Concatenate()([GG, H])
                else:
                    sentence_output = H

            if args.use_bert_cls == 2 and args.use_bert:
                sentence_output = Concatenate()([sentence_output, bert_cls])

            sentence_output = share_gcn_dense(sentence_output)

            if args.use_doc:
                doc_output_1 = conv(doc_output_1)
                doc_output_2 = conv(doc_output_2)

        word_embeddings = Concatenate()([word_embeddings, sentence_output])

    init_shared_features = sentence_output

    #######################################
    # Define task-specific layers
    #######################################

    # AE specific layers
    aspect_cnn = Sequential()
    for a in xrange(args.aspect_layers):
        print 'Aspect extraction layer %s' % a
        aspect_cnn.add(Dropout(args.dropout_prob))
        aspect_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='aspect_cnn_%s'%a))
    aspect_dense = Dense(nb_class, activation='softmax', name='aspect_dense')
    aspect_dense_ = Dense(nb_class, name='aspect_dense_')

    # AS specific layers
    sentiment_cnn = Sequential()
    for b in xrange(args.senti_layers):
        print 'Sentiment classification layer %s' % b
        sentiment_cnn.add(Dropout(args.dropout_prob))
        sentiment_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='sentiment_cnn_%s'%b))

    sentiment_att = Self_attention(args.use_opinion, name='sentiment_att')
    sentiment_dense = Dense(3, activation='softmax', name='sentiment_dense')
    # sentiment_dense_ = Dense(3, name='sentiment_dense_')

    if args.use_doc:
        # DS specific layers
        doc_senti_cnn = Sequential()
        for c in xrange(args.doc_senti_layers):
            print 'Document-level sentiment layers %s' % c
            doc_senti_cnn.add(Dropout(args.dropout_prob))
            doc_senti_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                      activation='relu', padding='same', kernel_initializer=my_init, name='doc_sentiment_cnn_%s'%c))

        doc_senti_att = Attention(name='doc_senti_att')
        doc_senti_dense = Dense(3, name='doc_senti_dense')
        # The reason not to use the default softmax is that it reports errors when input_dims=2 due to
        # compatibility issues between the tf and keras versions used.
        softmax = Lambda(lambda x: K.tf.nn.softmax(x),
                         name='doc_senti_softmax')

        # DD specific layers
        doc_domain_cnn = Sequential()
        for d in xrange(args.doc_domain_layers):
            print 'Document-level domain layers %s' % d
            doc_domain_cnn.add(Dropout(args.dropout_prob))
            doc_domain_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                      activation='relu', padding='same', kernel_initializer=my_init, name='doc_domain_cnn_%s'%d))

        doc_domain_att = Attention(name='doc_domain_att')
        doc_domain_dense = Dense(1,
                                 activation='sigmoid',
                                 name='doc_domain_dense')

    # re-encoding layer
    enc = Dense(300, activation='relu', name='enc')

    ####################################################
    # aspect-level operations involving message passing
    ####################################################

    for i in xrange(args.interactions + 1):
        print 'Interaction number ', i
        aspect_output = sentence_output
        sentiment_output = sentence_output
        # note that the aspet-level data will also go through the doc-level models
        doc_senti_output = sentence_output
        doc_domain_output = sentence_output

        ### AE ###
        if args.aspect_layers > 0:
            aspect_output = aspect_cnn(aspect_output)
        # concate word embeddings and task-specific output for prediction
        aspect_output = Concatenate()([word_embeddings, aspect_output])
        if args.use_bert_cls == 3 and args.use_bert:
            aspect_output = Concatenate()([aspect_output, bert_cls])
        aspect_output = Dropout(args.dropout_prob)(aspect_output)
        aspect_probs = aspect_dense(aspect_output)

        use_crf = 0
        if use_crf:
            aspect_prob = aspect_dense_(aspect_output)
            aspect_crf = CRF(nb_class, sparse_target=True)  # False
            aspect_crf_output = aspect_crf(aspect_prob)
        ### AS ###
        if args.senti_layers > 0:
            sentiment_output = sentiment_cnn(sentiment_output)

        sentiment_output = sentiment_att(
            [sentiment_output, op_label_input, aspect_probs, p_gold_op])
        sentiment_output = Concatenate()(
            [init_shared_features, sentiment_output])
        if args.use_bert_cls == 3 and args.use_bert:
            sentiment_output = Concatenate()([sentiment_output, bert_cls])
        sentiment_output = Dropout(args.dropout_prob)(sentiment_output)
        sentiment_probs = sentiment_dense(sentiment_output)
        # use_crf = 0
        # if use_crf:
        #     sentiment_prob = sentiment_dense_(sentiment_output)
        #     senti_crf = CRF(3, sparse_target=False) # False
        #     senti_crf_output = senti_crf(sentiment_prob)

        if args.use_doc:
            ### DS ###
            if args.doc_senti_layers > 0:
                doc_senti_output = doc_senti_cnn(doc_senti_output)
            # output attention weights with two activation functions
            senti_att_weights_softmax, senti_att_weights_sigmoid = doc_senti_att(
                doc_senti_output)
            # reshape the sigmoid attention weights, will be used in message passing
            senti_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
                senti_att_weights_sigmoid)

            doc_senti_output = WeightedSum()(
                [doc_senti_output, senti_att_weights_softmax])
            doc_senti_output = Dropout(args.dropout_prob)(doc_senti_output)
            doc_senti_output = doc_senti_dense(doc_senti_output)
            doc_senti_probs = softmax(doc_senti_output)
            # reshape the doc-level sentiment predictions, will be used in message passing
            doc_senti_probs = Lambda(lambda x: K.expand_dims(x, axis=-2))(
                doc_senti_probs)
            doc_senti_probs = Lambda(lambda x: K.repeat_elements(
                x, overall_maxlen, axis=1))(doc_senti_probs)

            ### DD ###
            if args.doc_domain_layers > 0:
                doc_domain_output = doc_domain_cnn(doc_domain_output)
            domain_att_weights_softmax, domain_att_weights_sigmoid = doc_domain_att(
                doc_domain_output)
            domain_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
                domain_att_weights_sigmoid)

            doc_domain_output = WeightedSum()(
                [doc_domain_output, domain_att_weights_softmax])
            doc_domain_output = Dropout(args.dropout_prob)(doc_domain_output)
            doc_domain_probs = doc_domain_dense(doc_domain_output)

            # update sentence_output for the next iteration
            sentence_output = Concatenate()([
                sentence_output, aspect_probs, sentiment_probs,
                doc_senti_probs, senti_weights, domain_weights
            ])

        else:
            # update sentence_output for the next iteration
            #sentence_output = Concatenate()([sentence_output, aspect_probs, sentiment_probs])
            if args.use_prob:
                sentence_output = Concatenate()(
                    [sentence_output, aspect_probs, sentiment_probs])
            else:
                sentence_output = Concatenate()(
                    [sentence_output, aspect_output, sentiment_output])
        if args.use_bert_cls == 4 and args.use_bert:
            sentence_output = Concatenate()([sentence_output, bert_cls])
        sentence_output = enc(sentence_output)
    use_crf = 0
    if use_crf:
        aspect_prob = aspect_dense_(aspect_output)
        aspect_crf = CRF(nb_class, sparse_target=True)  # False
        aspect_crf_output = aspect_crf(aspect_prob)
        aspect_model = Model(inputs=[sentence_input] + A_in +
                             [op_label_input] + [p_gold_op],
                             outputs=[aspect_crf_output, sentiment_probs])
    else:
        if args.use_bert:
            aspect_model = Model(inputs=[sentence_input] + A_in +
                                 [op_label_input] + [p_gold_op] + [bert_input],
                                 outputs=[aspect_probs, sentiment_probs])
        else:
            aspect_model = Model(inputs=[sentence_input] + A_in +
                                 [op_label_input] + [p_gold_op],
                                 outputs=[aspect_probs, sentiment_probs])

    ####################################################
    # doc-level operations without message passing
    ####################################################

    if args.use_doc:
        if args.doc_senti_layers > 0:
            doc_output_1 = doc_senti_cnn(doc_output_1)
        att_1, _ = doc_senti_att(doc_output_1)
        doc_output_1 = WeightedSum()([doc_output_1, att_1])
        doc_output_1 = Dropout(args.dropout_prob)(doc_output_1)
        doc_output_1 = doc_senti_dense(doc_output_1)
        doc_prob_1 = softmax(doc_output_1)

        if args.doc_domain_layers > 0:
            doc_output_2 = doc_domain_cnn(doc_output_2)
        att_2, _ = doc_domain_att(doc_output_2)
        doc_output_2 = WeightedSum()([doc_output_2, att_2])
        doc_output_2 = Dropout(args.dropout_prob)(doc_output_2)
        doc_prob_2 = doc_domain_dense(doc_output_2)

        doc_model = Model(inputs=[doc_input_1, doc_input_2],
                          outputs=[doc_prob_1, doc_prob_2])

    else:
        doc_model = None

    ####################################################
    # initialize word embeddings
    ####################################################

    logger.info('Initializing lookup table')

    # Load pre-trained word vectors.
    # To save the loading time, here we load from the extracted subsets of the original embeddings,
    # which only contains the embeddings of words in the vocab.
    if args.use_doc:
        emb_path_gen = '../glove/%s_.txt' % (args.domain)
        emb_path_domain = '../domain_specific_emb/%s_.txt' % (args.domain)
    else:
        emb_path_gen = '../glove/%s.txt' % (args.domain)
        emb_path_domain = '../domain_specific_emb/%s.txt' % (args.domain)

    # Load pre-trained word vectors from the orginal large files
    # If you are loading from ssd, the process would only take 1-2 mins
    # If you are loading from hhd, the process would take a few hours at first try,
    # and would take 1-2 mins in subsequent repeating runs (due to cache performance).

    # emb_path_gen = '../glove.840B.300d.txt'
    # if args.domain == 'lt':
    #     emb_path_domain = '../laptop_emb.vec'
    # else:
    #     emb_path_domain = '../restaurant_emb.vec'

    aspect_model.get_layer('word_emb').set_weights(
        init_emb(
            aspect_model.get_layer('word_emb').get_weights(), vocab,
            emb_path_gen, emb_path_domain))

    logger.info('  Done')
    ## Optimizaer algorithm
    #

    from optimizers import get_optimizer

    optimizer = get_optimizer(args)
    if args.use_doc == 1 and args.interactions > 0:
        # fix the document-specific parameters when updating aspect model
        aspect_model.get_layer('doc_senti_att').trainable = False
        aspect_model.get_layer('doc_senti_dense').trainable = False
        aspect_model.get_layer('doc_domain_att').trainable = False

    if use_crf:
        aspect_model.compile(
            optimizer=optimizer,
            loss=[aspect_crf.loss_function, 'categorical_crossentropy'],
            loss_weights=[1., 1.])
    else:
        aspect_model.compile(
            optimizer=optimizer,
            loss=['categorical_crossentropy', 'categorical_crossentropy'],
            loss_weights=[1., 1.])
    #aspect_model.summary()

    if args.use_doc == 1:
        doc_model.get_layer('doc_senti_att').trainable = True
        doc_model.get_layer('doc_senti_dense').trainable = True
        doc_model.get_layer('doc_domain_att').trainable = True
        doc_model.get_layer('doc_domain_dense').trainable = True

    if args.use_doc:
        doc_model.compile(
            optimizer=optimizer,
            loss=['categorical_crossentropy', 'binary_crossentropy'],
            loss_weights=[1., 1.],
            metrics=['categorical_accuracy', 'accuracy'])

    return aspect_model, doc_model
Exemplo n.º 11
0
def create_model(args, vocab, nb_class, overall_maxlen, doc_maxlen_1,
                 doc_maxlen_2):

    # Funtion that initializes word embeddings
    def init_emb(emb_matrix, vocab, emb_file_gen, emb_file_domain):

        print 'Loading pretrained general word embeddings and domain word embeddings ...'

        counter_gen = 0.
        pretrained_emb = open(emb_file_gen)
        for line in pretrained_emb:
            tokens = line.split()
            if len(tokens) != 301:
                continue
            word = tokens[0]
            vec = tokens[1:]
            try:
                emb_matrix[0][vocab[word]][:300] = vec
                counter_gen += 1
            except KeyError:
                pass

        if args.use_domain_emb:
            counter_domain = 0.
            pretrained_emb = open(emb_file_domain)
            for line in pretrained_emb:
                tokens = line.split()
                if len(tokens) != 101:
                    continue
                word = tokens[0]
                vec = tokens[1:]
                try:
                    emb_matrix[0][vocab[word]][300:] = vec
                    counter_domain += 1
                except KeyError:
                    pass

        pretrained_emb.close()
        logger.info(
            '%i/%i word vectors initialized by general embeddings (hit rate: %.2f%%)'
            % (counter_gen, len(vocab), 100 * counter_gen / len(vocab)))

        if args.use_domain_emb:
            logger.info(
                '%i/%i word vectors initialized by domain embeddings (hit rate: %.2f%%)'
                % (counter_domain, len(vocab),
                   100 * counter_domain / len(vocab)))

        return emb_matrix

    # Build model
    logger.info('Building model ...')
    print 'Building model ...'
    print '\n\n'

    vocab_size = len(vocab)

    ###################################
    # Inputs
    ###################################
    print 'Input layer'
    # sequence of token indices for aspect-level data
    sentence_input = Input(shape=(overall_maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    # gold opinion label for aspect-level data.
    op_label_input = Input(shape=(overall_maxlen, 3),
                           dtype=K.floatx(),
                           name='op_label_input')
    # probability of sending gold opinion labels at opinion transmission step
    p_gold_op = Input(shape=(overall_maxlen, ),
                      dtype=K.floatx(),
                      name='p_gold_op')

    A_in = Input(shape=(overall_maxlen, overall_maxlen),
                 dtype=K.floatx(),
                 name='A_input')

    if args.use_doc:
        # doc_input_1 denotes the data for sentiment classification
        # doc_input_2 denotes the data for domain classification
        doc_input_1 = Input(shape=(doc_maxlen_1, ),
                            dtype='int32',
                            name='doc_input_1')
        doc_input_2 = Input(shape=(doc_maxlen_2, ),
                            dtype='int32',
                            name='doc_input_2')

    if args.use_bert:
        bert_input = Input(shape=(overall_maxlen + 1, 768),
                           dtype=K.floatx(),
                           name='bert_input')  # +1 denote +cls
    #########################################
    # Shared word embedding layer
    #########################################
    print 'Word embedding layer'
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb')

    # aspect-level inputs
    word_embeddings = word_emb(sentence_input)
    sentence_output = word_embeddings

    # doc-level inputs
    if args.use_doc:
        doc_output_1 = word_emb(doc_input_1)
        # we only use general embedding for domain classification
        doc_output_2 = word_emb(doc_input_2)
        if args.use_domain_emb:
            # mask out the domain embeddings
            doc_output_2 = Remove_domain_emb()(doc_output_2)

    def slice(x, index):
        return x[:, index, :]

    def slice1(x, index):
        return x[:, index:, :]

    expand_dim = Lambda(lambda x: K.expand_dims(x, axis=1))
    if args.use_bert:
        #code.interact(local=locals())
        bert_inp = Lambda(slice1, arguments={'index': 1})(bert_input)
        bert_cls = Lambda(slice, arguments={'index': 0})(bert_input)
        #sentence_output = Concatenate()([sentence_output, bert_inp])
        #       if args.use_bert_cls:
        #code.interact(local=locals())
        #bert_cls = bert_input[:,0,:]
        node_num = sentence_output.shape.as_list()[1]
        bert_cls1 = expand_dim(bert_cls)
        bert_cls = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(bert_cls1)

    from my_layers_algo import DigiCaps, Length, Capsule

    if args.use_bert_cls == 0 and args.use_bert:
        sentence_output = Concatenate()([sentence_output, bert_cls])

    ######################################
    # Shared CNN layers
    ######################################

    for i in xrange(args.shared_layers):
        print 'Shared CNN layer %s' % i
        sentence_output = Dropout(args.dropout_prob)(sentence_output)
        if args.use_doc:
            doc_output_1 = Dropout(args.dropout_prob)(doc_output_1)
            doc_output_2 = Dropout(args.dropout_prob)(doc_output_2)

        if i == 0:
            #conv_0 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=2, \
            #  activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_0')
            conv_1 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=3, \
              activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_1')
            #conv_2 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=4, \
            #  activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_2')
            conv_3 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=5, \
              activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_3')

            #sentence_output_0 = conv_0(sentence_output)
            sentence_output_1 = conv_1(sentence_output)
            #sentence_output_2 = conv_2(sentence_output)
            sentence_output_3 = conv_3(sentence_output)
            #sentence_output = Concatenate()([sentence_output_0, sentence_output_1, sentence_output_2, sentence_output_3])
            sentence_output = Concatenate()(
                [sentence_output_1, sentence_output_3])

            if args.use_doc:

                #doc_output_1_0 = conv_0(doc_output_1)
                doc_output_1_1 = conv_1(doc_output_1)
                #doc_output_1_2 = conv_2(doc_output_1)
                doc_output_1_3 = conv_3(doc_output_1)
                #doc_output_1 = Concatenate()([doc_output_1_0, doc_output_1_1, doc_output_1_2, doc_output_1_3])
                doc_output_1 = Concatenate()([doc_output_1_1, doc_output_1_3])

                #doc_output_2_0 = conv_0(doc_output_2)
                doc_output_2_1 = conv_1(doc_output_2)
                #doc_output_2_2 = conv_2(doc_output_2)
                doc_output_2_3 = conv_3(doc_output_2)
                #doc_output_2 = Concatenate()([doc_output_2_0, doc_output_2_1, doc_output_2_2, doc_output_2_3])
                doc_output_2 = Concatenate()([doc_output_2_1, doc_output_2_3])

        else:
            #conv = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=3, \
            #  activation='relu', padding='same', kernel_initializer=my_init, name='cnn_3_%s'%i)
            conv_ = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
              activation='relu', padding='same', kernel_initializer=my_init, name='cnn_5_%s'%i)
            #sentence_output1 = conv(sentence_output)
            sentence_output = conv_(sentence_output)
            #sentence_output = Concatenate()([sentence_output1, sentence_output2])

            if args.use_doc:
                doc_output_1 = conv_(doc_output_1)

                doc_output_2 = conv_(doc_output_2)

        word_embeddings = Concatenate()([word_embeddings, sentence_output])

    init_shared_features = sentence_output

    #######################################
    # Define task-specific layers
    #######################################
    #if args.which_dual == 'dual':
    #   from my_layers import Conv1DWithMasking, Remove_domain_emb, Self_attention, Attention, WeightedSum, Dual_attention
    #else:
    #     from my_layers_algo import Conv1DWithMasking, Remove_domain_emb, Self_attention, Attention, WeightedSum, Dual_attention
    # AE specific layers
    aspect_cnn = Sequential()
    for a in xrange(args.aspect_layers):
        print 'Aspect extraction layer %s' % a
        aspect_cnn.add(Dropout(args.dropout_prob))
        aspect_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='aspect_cnn_%s'%a))
    aspect_dense = Dense(nb_class, activation='softmax', name='aspect_dense')

    # OE specific layers
    opinion_cnn = Sequential()
    for a in xrange(args.opinion_layers):
        print 'Opinion extraction layer %s' % a
        opinion_cnn.add(Dropout(args.dropout_prob))
        opinion_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='opinion_cnn_%s'%a))
    opinion_dense = Dense(nb_class, activation='softmax', name='opinion_dense')

    # AS specific layers
    sentiment_cnn = Sequential()
    for b in xrange(args.senti_layers):
        print 'Sentiment classification layer %s' % b
        sentiment_cnn.add(Dropout(args.dropout_prob))
        sentiment_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                  activation='relu', padding='same', kernel_initializer=my_init, name='sentiment_cnn_%s'%b))

    sentiment_att = Self_attention(args.use_opinion, name='sentiment_att')
    sentiment_dense = Dense(3, activation='softmax', name='sentiment_dense')

    aspect_dual_att = Dual_attention(name='aspect_dualatt')
    opinion_dual_att = Dual_attention(name='opinion_dualatt')
    sentiment_dual_att = Dual_attention(name='sentiment_dualatt')

    asp_caps = Capsule(num_capsule=overall_maxlen,
                       A=A_in,
                       dim_capsule=args.capsule_dim,
                       routings=3,
                       name='asp_caps')
    senti_caps = Capsule(num_capsule=overall_maxlen,
                         A=A_in,
                         dim_capsule=args.capsule_dim,
                         routings=3,
                         name='senti_caps')
    opin_caps = Capsule(num_capsule=overall_maxlen,
                        A=A_in,
                        dim_capsule=args.capsule_dim,
                        routings=3,
                        name='opin_caps')

    #probs = Length(name='out_caps')
    if args.use_doc:
        # DS specific layers
        doc_senti_cnn = Sequential()
        for c in xrange(args.doc_senti_layers):
            print 'Document-level sentiment layers %s' % c
            doc_senti_cnn.add(Dropout(args.dropout_prob))
            doc_senti_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                      activation='relu', padding='same', kernel_initializer=my_init, name='doc_sentiment_cnn_%s'%c))

        doc_senti_att = Attention(name='doc_senti_att')
        doc_senti_dense = Dense(3, name='doc_senti_dense')
        # The reason not to use the default softmax is that it reports errors when input_dims=2 due to
        # compatibility issues between the tf and keras versions used.
        softmax = Lambda(lambda x: K.tf.nn.softmax(x),
                         name='doc_senti_softmax')

        # DD specific layers
        doc_domain_cnn = Sequential()
        for d in xrange(args.doc_domain_layers):
            print 'Document-level domain layers %s' % d
            doc_domain_cnn.add(Dropout(args.dropout_prob))
            doc_domain_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \
                      activation='relu', padding='same', kernel_initializer=my_init, name='doc_domain_cnn_%s'%d))

        doc_domain_att = Attention(name='doc_domain_att')
        doc_domain_dense = Dense(1,
                                 activation='sigmoid',
                                 name='doc_domain_dense')

    # re-encoding layer
    enc = Dense(args.cnn_dim, activation='relu', name='enc')
    enc_a = Dense(args.cnn_dim, activation='relu', name='enc_a')
    enc_o = Dense(args.cnn_dim, activation='relu', name='enc_o')
    enc_s = Dense(args.cnn_dim, activation='relu', name='enc_s')
    enc_d = Dense(args.cnn_dim, activation='relu', name='enc_d')

    ####################################################
    # aspect-level operations involving message passing
    ####################################################
    print(sentence_output)
    #    sentence_output = enc(sentence_output)
    aspect_output = sentence_output
    opinion_output = sentence_output
    sentiment_output = sentence_output

    doc_senti_output = sentence_output
    doc_domain_output = sentence_output
    for i in xrange(args.interactions + 1):
        print 'Interaction number ', i
        if args.use_doc:
            ### DS ###
            if args.doc_senti_layers > 0:
                doc_senti_output = doc_senti_cnn(doc_senti_output)
            # output attention weights with two activation functions
            senti_att_weights_softmax, senti_att_weights_sigmoid = doc_senti_att(
                doc_senti_output)

            # reshape the sigmoid attention weights, will be used in message passing
            senti_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
                senti_att_weights_sigmoid)
            doc_senti_output1 = WeightedSum()(
                [doc_senti_output, senti_att_weights_softmax])
            doc_senti_output1 = Dropout(args.dropout_prob)(doc_senti_output1)
            doc_senti_output1 = doc_senti_dense(doc_senti_output1)
            doc_senti_probs = softmax(doc_senti_output1)
            # reshape the doc-level sentiment predictions, will be used in message passing
            doc_senti_probs = Lambda(lambda x: K.expand_dims(x, axis=-2))(
                doc_senti_probs)
            doc_senti_probs = Lambda(lambda x: K.repeat_elements(
                x, overall_maxlen, axis=1))(doc_senti_probs)

            ### DD ###
            if args.doc_domain_layers > 0:
                doc_domain_output = doc_domain_cnn(doc_domain_output)
            domain_att_weights_softmax, domain_att_weights_sigmoid = doc_domain_att(
                doc_domain_output)
            domain_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
                domain_att_weights_sigmoid)
            #code.interact(local=locals())
            doc_domain_output1 = WeightedSum()(
                [doc_domain_output, domain_att_weights_softmax])
            doc_domain_output1 = Dropout(args.dropout_prob)(doc_domain_output1)
            doc_domain_probs = doc_domain_dense(doc_domain_output1)

        if args.use_bert:
            aspect_output = Concatenate()([aspect_output, bert_inp])
            opinion_output = Concatenate()([opinion_output, bert_inp])
            sentiment_output = Concatenate()([sentiment_output, bert_inp])
            aspect_output = Dropout(args.dropout_prob)(aspect_output)
            opinion_output = Dropout(args.dropout_prob)(opinion_output)
            sentiment_output = Dropout(args.dropout_prob)(sentiment_output)

        ### AE ###
        if args.aspect_layers > 0:
            aspect_output = aspect_cnn(aspect_output)
        # concate word embeddings and task-specific output for prediction
        ### OE ###
        if args.aspect_layers > 0:
            opinion_output = opinion_cnn(opinion_output)
        ### AS ###
        if args.senti_layers > 0:
            sentiment_output = sentiment_cnn(sentiment_output)

        opin2asp = asp_caps([aspect_output, opinion_output])
        senti2asp = asp_caps([aspect_output, sentiment_output])
        asp = Concatenate()([opin2asp, senti2asp])

        asp2opin = opin_caps([opinion_output, aspect_output])
        senti2opin = opin_caps([opinion_output, sentiment_output])
        opin = Concatenate()([asp2opin, senti2opin])

        asp2senti = senti_caps([sentiment_output, aspect_output])
        opin2senti = senti_caps([sentiment_output, opinion_output])
        senti = Concatenate()([asp2senti, opin2senti])
        #sentiment_output = sentiment_att([sentiment_output, op_label_input, opinion_probs, p_gold_op])

        #        aspect_output += asp
        #        opinion_output += opin
        #        sentiment_output += senti
        if args.use_doc:
            aspect_output = Concatenate()(
                [word_embeddings, aspect_output, asp, domain_weights])
            opinion_output = Concatenate()(
                [word_embeddings, opinion_output, opin, domain_weights])
            sentiment_output = Concatenate()([
                init_shared_features, sentiment_output, senti, doc_senti_probs,
                senti_weights
            ])
        else:
            aspect_output = Concatenate()(
                [word_embeddings, aspect_output, asp])
            opinion_output = Concatenate()(
                [word_embeddings, opinion_output, opin])
            sentiment_output = Concatenate()(
                [init_shared_features, sentiment_output, senti])
        #aspect_output = Concatenate()([init_shared_features, aspect_output])
        aspect_output = Dropout(args.dropout_prob)(aspect_output)
        aspect_probs = aspect_dense(aspect_output)

        #opinion_output = Concatenate()([init_shared_features, opinion_output])
        opinion_output = Dropout(args.dropout_prob)(opinion_output)
        opinion_probs = opinion_dense(opinion_output)

        #sentiment_output = Concatenate()([word_embeddings, sentiment_output])
        sentiment_output = Dropout(args.dropout_prob)(sentiment_output)
        sentiment_probs = sentiment_dense(sentiment_output)

        # update sentence_output for the next iteration

        opinion_output = Concatenate()([
            opinion_output, aspect_probs, opinion_probs, sentiment_probs,
            domain_weights
        ])
        aspect_output = Concatenate()([
            aspect_output, aspect_probs, opinion_probs, sentiment_probs,
            domain_weights
        ])
        sentiment_output = Concatenate()([
            sentiment_output, aspect_probs, opinion_probs, sentiment_probs,
            doc_senti_probs, senti_weights
        ])
        sentence_output_ = Concatenate()([
            sentence_output, aspect_probs, opinion_probs, sentiment_probs,
            doc_senti_probs, senti_weights, domain_weights
        ])
        #code.interact(local=locals())
        aspect_output = enc_a(aspect_output)
        opinion_output = enc_o(opinion_output)
        sentiment_output = enc_s(sentiment_output)
        if args.use_doc:
            doc_senti_output = enc_d(sentence_output_)
            doc_domain_output = enc_d(sentence_output_)
        if args.use_bert:
            aspect_model = Model(
                inputs=[
                    sentence_input, A_in, op_label_input, p_gold_op, bert_input
                ],
                outputs=[aspect_probs, opinion_probs, sentiment_probs])
        else:
            aspect_model = Model(
                inputs=[sentence_input, A_in, op_label_input, p_gold_op],
                outputs=[aspect_probs, opinion_probs, sentiment_probs])

    ####################################################
    # doc-level operations without message passing
    ####################################################

    if args.use_doc:
        if args.doc_senti_layers > 0:
            doc_output_1 = doc_senti_cnn(doc_output_1)
        att_1, _ = doc_senti_att(doc_output_1)
        doc_output_1 = WeightedSum()([doc_output_1, att_1])
        doc_output_1 = Dropout(args.dropout_prob)(doc_output_1)
        doc_output_1 = doc_senti_dense(doc_output_1)
        doc_prob_1 = softmax(doc_output_1)

        if args.doc_domain_layers > 0:
            doc_output_2 = doc_domain_cnn(doc_output_2)
        att_2, _ = doc_domain_att(doc_output_2)
        doc_output_2 = WeightedSum()([doc_output_2, att_2])
        doc_output_2 = Dropout(args.dropout_prob)(doc_output_2)
        doc_prob_2 = doc_domain_dense(doc_output_2)

        doc_model = Model(inputs=[doc_input_1, doc_input_2],
                          outputs=[doc_prob_1, doc_prob_2])

    else:
        doc_model = None

    ####################################################
    # initialize word embeddings
    ####################################################

    logger.info('Initializing lookup table')

    # Load pre-trained word vectors.
    # To save the loading time, here we load from the extracted subsets of the original embeddings,
    # which only contains the embeddings of words in the vocab.
    if args.use_doc:
        emb_path_gen = '../glove/%s_.txt' % (args.domain)
        emb_path_domain = '../domain_specific_emb/%s_.txt' % (args.domain)
    else:
        emb_path_gen = '../glove/%s.txt' % (args.domain)
        emb_path_domain = '../domain_specific_emb/%s.txt' % (args.domain)

    # Load pre-trained word vectors from the orginal large files
    # If you are loading from ssd, the process would only take 1-2 mins
    # If you are loading from hhd, the process would take a few hours at first try,
    # and would take 1-2 mins in subsequent repeating runs (due to cache performance).

    # emb_path_gen = '../glove.840B.300d.txt'
    # if args.domain == 'lt':
    #     emb_path_domain = '../laptop_emb.vec'
    # else:
    #     emb_path_domain = '../restaurant_emb.vec'

    aspect_model.get_layer('word_emb').set_weights(
        init_emb(
            aspect_model.get_layer('word_emb').get_weights(), vocab,
            emb_path_gen, emb_path_domain))

    logger.info('  Done')

    return aspect_model, doc_model