Exemplos de SelfAttention em Python, exemplos de attention.SelfAttention em Python

Exemplo n.º 1

0

Exibir arquivo

    def __init__(self,
                 wordEmbedding,
                 hidden_size,
                 self_att_hidden,
                 n_hops,
                 paddingId,
                 updateEmbedding,
                 dropout=None):
        super(Dense_Self_Attention, self).__init__()

        embeddingSize = wordEmbedding.getEmbeddingSize()
        self.embedding = nn.Embedding(
            wordEmbedding.getNumberOfVectors(),
            embeddingSize,
            padding_idx=wordEmbedding.getPaddingIdx())
        self.embedding.weight.data.copy_(
            torch.from_numpy(wordEmbedding.getEmbeddingMatrix()))
        self.embedding.weight.requires_grad = updateEmbedding

        self.dense = nn.Linear(embeddingSize,
                               embeddingSize) if hidden_size else None

        self.self_attention = SelfAttention(embeddingSize, self_att_hidden,
                                            n_hops)
        self.paddingId = paddingId
        self.output_size = self.self_attention.getOutputSize()
        self.dropout = nn.Dropout(dropout) if dropout else None

Exemplo n.º 2

0

Exibir arquivo

class Dense_Self_Attention(nn.Module):
    def __init__(self,
                 wordEmbedding,
                 hidden_size,
                 self_att_hidden,
                 n_hops,
                 paddingId,
                 updateEmbedding,
                 dropout=None):
        super(Dense_Self_Attention, self).__init__()

        embeddingSize = wordEmbedding.getEmbeddingSize()
        self.embedding = nn.Embedding(
            wordEmbedding.getNumberOfVectors(),
            embeddingSize,
            padding_idx=wordEmbedding.getPaddingIdx())
        self.embedding.weight.data.copy_(
            torch.from_numpy(wordEmbedding.getEmbeddingMatrix()))
        self.embedding.weight.requires_grad = updateEmbedding

        self.dense = nn.Linear(embeddingSize,
                               embeddingSize) if hidden_size else None

        self.self_attention = SelfAttention(embeddingSize, self_att_hidden,
                                            n_hops)
        self.paddingId = paddingId
        self.output_size = self.self_attention.getOutputSize()
        self.dropout = nn.Dropout(dropout) if dropout else None

    def forward(self, x):
        mask = (x != self.paddingId).float()

        x = self.embedding(x)

        if self.dense is not None:
            res = F.relu(self.dense(x)) * mask.unsqueeze(2)
            x = x + res

        x, att = self.self_attention(x, mask)
        x = x.view(x.size(0), self.output_size)

        if self.dropout:
            self.dropout(x)

        return x

    def getOutputSize(self):
        return self.self_attention.getOutputSize()

Exemplo n.º 3

0

Exibir arquivo

def create_model():
    token_ids=tf.placeholder(tf.int32,shape=(None,None),name="token_ids")
    score_matrix=tf.placeholder(tf.float32,shape=(None,None,None,n_type),name="score_matrix")
    max_seq_len=tf.placeholder(tf.int32,name="max_seq_len")

    mask=tf.cast(tf.greater(tf.expand_dims(token_ids,2),0),dtype=tf.float32)
    embedding=tf.get_variable(name="embedding",initializer=tf.cast(vec,tf.float32))
    # embedding=tf.get_variable(name="embedding",dtype=tf.float32,shape=[char_size,100],initializer=tf.random_normal_initializer)
    inp_embed=tf.nn.embedding_lookup(embedding,token_ids)
    inp=inp_embed*mask

    inp=Bidirectional(LSTM(units=256,return_sequences=True))(inp)  #GPU上使用CuDNNLSTM加速  CPU上使用LSTM
    inp=Bidirectional(LSTM(units=256,return_sequences=True))(inp)

    #ATT+BN
    self_attention=SelfAttention(d_model=512)
    lstm_output=self_attention(token_ids,inp)
    lstm_output=tf.nn.batch_normalization(lstm_output,mean=0.0,variance=3.0,offset=None,scale=1.0,variance_epsilon=0.01,name="BN1")

    # lstm_output=inp
    left=Conv1D(filters=64,kernel_size=3,activation="relu",padding="same")(lstm_output)
    right=Conv1D(filters=64,kernel_size=3,activation="relu",padding="same")(lstm_output)
    out=broadcasting(left,right)
    out = tf.nn.batch_normalization(out, mean=0.0, variance=3.0, offset=None, scale=1.0, variance_epsilon=0.01,name="BN2")

    pred_score=Dense(units=50,activation="softmax")(out)

    loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred_score,labels=score_matrix))

    optimizer=tf.train.AdamOptimizer(learning_rate=0.001)
    train_op=optimizer.minimize(loss)
    return token_ids,score_matrix,max_seq_len,pred_score,loss,train_op

Exemplo n.º 4

0

Exibir arquivo

Arquivo: dcase_cnn.py Projeto: akio-kobayashi/acoustic_scene

    def __call__(self, x):

        for depth in range(4):
            filters=64*(depth+1)
            # add 2 convolutional layers
            for cv in range(2):
                x=Conv2D(filters=filters,
                        kernel_size=3, padding='same',
                        data_format='channels_last',
                        kernel_initializer='glorot_uniform')(x)
                x=BatchNormalization(axis=-1)(x)
                x=Activation('relu')(x)
            x=MaxPooling2D(pool_size=(2,2), data_format='channels_last')(x)
            x=Dropout(rate=self.dropout)(x)

        if self.attention == True:
            x=SelfAttention(x, output_filters=256)
            #x=MultiHeadAttention(head_num)(x)
            #x=BatchNormalization(axis=-1)(x)
            #x=GaussianNoise(0.3)(x)

        # global max pooling 2D
        x=GlobalAveragePooling2D(data_format='channels_last')(x)

        # full connection
        output=Dense(units=ASC_CLASS, kernel_initializer='uniform', activation='softmax')(x)

        return output

Exemplo n.º 5

0

Exibir arquivo

Arquivo: agent.py Projeto: leb2/starcraft-rl

    def __init__(self, interface):
        super().__init__(interface)
        self.num_select_actions = self.interface.num_unit_selection_actions

        self.rnn_size = 256
        self.self_attention = SelfAttention(hidden_size=128, num_heads=2, attention_dropout=0, train=True)
        self.lstm = tf.contrib.rnn.LSTMCell(self.rnn_size)

        self.memory_input = tf.placeholder(tf.float32, [2, None, self.rnn_size], name="memory_input")
        self.unit_embeddings_input = tf.placeholder(tf.float32, [None, None, self.interface.unit_embedding_size],
                                                    name="unit_embeddings_input")
        self.unit_selection_input = tf.placeholder(tf.int32, [None], name="unit_selection_input")

        # TODO: Add in previous action index as an input
        self.prev_action_input = tf.placeholder(tf.int32, [None], name='prev_action_input')

        self.features = self.features()  # Shape [batch_size, num_features]

        lstm_output, self.next_lstm_state = self._lstm_step()
        self.train_output = self._lstm_step_train()
        self.all_values = parts.value_head(self.train_output)

        self.nonspacial_probs, self.spacial_probs_x, self.spacial_probs_y = self._probs_from_features(lstm_output)
        self.nonspacial_train, self.spacial_train_x, self.spacial_train_y = \
            self._probs_from_features(self.train_output[:-1])
        self.unit_selection_probs = self._selection_probs_from_features(lstm_output, self.unit_embeddings_input)
        self._f1 = lstm_output
        self.unit_selection_probs_train = self._selection_probs_from_features(self.train_output[:-1],
                                                                              self.unit_embeddings_input[:-1])

Exemplo n.º 6

0

Exibir arquivo

Arquivo: decoder.py Projeto: moein-shariatnia/Transformer

 def __init__(self, embed_size, heads, forward_expansion, dropout):
     super().__init__()
     self.attention = SelfAttention(embed_size, heads)
     self.norm = nn.LayerNorm(embed_size)
     self.transformer_block = TransformerBlock(
         embed_size, heads, dropout, forward_expansion
     )
     self.dropout = nn.Dropout(dropout)

Exemplo n.º 7

0

Exibir arquivo

def build_baseline0_newatt(dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    if not dataset.bert:
        q_att = SelfAttention(q_emb.num_hid, num_hid)
        v_att = NewAttention(dataset.v_dim + 2, q_emb.num_hid, num_hid)
        q_net = FCNet([q_emb.num_hid, num_hid])
    else:
        q_att = SelfAttention(768, num_hid)
        q_emb = FCNet([768, 768])
        v_att = NewAttention(dataset.v_dim, 768, num_hid)
        q_net = FCNet([768, num_hid])
    v_net = FCNet([dataset.v_dim + 2, num_hid])
    classifier = SimpleClassifier(num_hid, num_hid * 2,
                                  dataset.num_ans_candidates, 0.5)
    return BaseModel(w_emb, q_emb, q_att, v_att, q_net, v_net, classifier,
                     dataset.bert)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: train_Lstm.py Projeto: yephm/Keras_LSTMwithATT_model

def train_lstm(p_n_symbols, p_embedding_weights, p_X_train, p_y_train, p_X_test, p_y_test,vocab_dim,input_length,batch_size,n_epoch):
    """
    模型建置
    """
    print ('build model...')
    model = Sequential()
    model.add(Embedding(output_dim=vocab_dim,
                        input_dim=p_n_symbols,
                        mask_zero=True,
                        weights=[p_embedding_weights],
                        input_length=input_length))
    model.add(Dropout(0.5))
    model.add(LSTM(output_dim=50))
    model.add(Dropout(0.5))
    model.add(SelfAttention())
    model.add(Dense(32,activation='tanh'))
    model.add(Dense(3,activation='softmax'))
   
    
    print ('processing...') 
    #調整學習率
    adam=keras.optimizers.Adam(lr=0.00003)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy', precision, recall, f1_score])

    print ("training...")
    history=model.fit(p_X_train, p_y_train, batch_size=batch_size, nb_epoch=n_epoch,
              validation_data=(p_X_test, p_y_test))

    #透過matplot繪圖顯示訓練過程    
    print ("counting...")
    score, acc,prec,re,f1 = model.evaluate(p_X_test, p_y_test, batch_size=batch_size)
    print ('Test score:', score)
    print ('Test accuracy:', acc)
    print ('Test precision:', prec)
    print ('Test recall:', re)
    print ('Test f1-score:', f1)
    plt.subplot(211)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='best')
       
    plt.subplot(212)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='best')
    plt.show()
    plt.savefig('lstm')
    #儲存模型
    model.save('lstm.h5')
    print('model is saved')

Exemplo n.º 9

0

Exibir arquivo

Arquivo: decoder.py Projeto: zqkzzz/dialogue-summarization

    def __init__(self, h=8, d_model=512, d_ff=2048, drop_rate=0.1):
        super(DecoderLayer, self).__init__()

        # Self Attention Layer
        # query key and value come from previous layer.
        self.self_attn = SelfAttention(h, d_model, drop_rate)
        # Source Target Attention Layer
        # query come from encoded space.
        # key and value come from previous self attention layer
        self.st_attn = SourceTargetAttention(h, d_model, drop_rate)
        self.ff = FFN(d_model, d_ff)

Exemplo n.º 10

0

Exibir arquivo

def aggregate_feature(model_part, d_model, seq_len):
    if model_part == "Attention":
        attn = SelfAttention(d_model, d_model)
        output_size = d_model
    elif model_part == "Flatten":
        attn = None
        output_size = d_model * seq_len
    else:
        raise ValueError("Unsupported model choice: "+model_part+ ". Please use \"Attention\" or \"Flatten\".")
        
    return attn, output_size

Exemplo n.º 11

0

Exibir arquivo

 def __init__(self, embed_size, heads, dropout, forward_expansion):
     super().__init__()
     self.attention = SelfAttention(embed_size, heads)
     self.norm1 = nn.LayerNorm(embed_size)
     self.norm2 = nn.LayerNorm(embed_size)
     self.feed_forward = nn.Sequential(
         nn.Linear(embed_size, forward_expansion * embed_size),
         nn.ReLU(),
         nn.Linear(forward_expansion * embed_size, embed_size),
     )
     self.dropout = nn.Dropout(dropout)

Exemplo n.º 12

0

Exibir arquivo

 def __init__(self, config, lossfct=None, CEL_type='mean' ,quick_return=False):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.lossfct = lossfct
     self.roberta = RobertaModel(config)
     self.attn = SelfAttention(config.hidden_size)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.CEL_type = CEL_type
     self.quick_return = quick_return
     self.init_weights()

Exemplo n.º 13

0

Exibir arquivo

 def __init__(self,
              config,
              lossfct=None,
              CEL_type='mean',
              quick_return=False):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.lossfct = lossfct
     self.transformer = XLNetModel(config)
     self.attn = SelfAttention(config.hidden_size)
     #self.sequence_summary = SequenceSummary(config)
     #self.dropout = nn.Dropout(0.1)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.CEL_type = CEL_type
     self.quick_return = quick_return
     self.init_weights()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: sagan.py Projeto: GlassyWing/Keras-GAN

    def build_generator(self):
        def create_conv_transp(filters):
            return Conv2DTranspose(filters,
                                   5,
                                   padding="SAME",
                                   activation=None,
                                   use_bias=False,
                                   strides=2)

        model = Sequential()
        model.add(
            keras.layers.Dense(self.gf_dim * self.gf_dim * self.gfc_dim,
                               input_dim=self.latent_dim))
        model.add(Reshape((self.gf_dim, self.gf_dim, self.gfc_dim)))
        model.add(create_conv_transp(self.gfc_dim // 2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(ReLU())

        model.add(create_conv_transp(self.gfc_dim // 4))
        model.add(BatchNormalization(momentum=0.8))
        model.add(ReLU())

        model.add(SelfAttention())

        model.add(create_conv_transp(self.gfc_dim // 8))
        model.add(BatchNormalization(momentum=0.8))
        model.add(ReLU())

        model.add(create_conv_transp(self.gfc_dim // 16))
        model.add(BatchNormalization(momentum=0.8))
        model.add(ReLU())

        model.add(
            Conv2D(self.channels,
                   3,
                   strides=1,
                   padding='SAME',
                   activation=None))
        model.add(Activation('tanh'))

        model.summary()

        noise = Input(shape=(self.latent_dim, ))
        img = model(noise)

        return Model(noise, img)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: sagan.py Projeto: GlassyWing/Keras-GAN

    def build_discriminator(self):
        def create_conv(filters):
            return Conv2D(filters,
                          5,
                          padding="SAME",
                          activation=None,
                          use_bias=False,
                          strides=2)

        model = Sequential()
        model.add(
            Conv2D(self.dfc_dim,
                   5,
                   padding="SAME",
                   activation=None,
                   use_bias=False,
                   strides=2,
                   input_shape=self.img_shape))
        model.add(create_conv(self.dfc_dim))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=self.alpha))

        model.add(create_conv(self.dfc_dim * 2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=self.alpha))

        model.add(SelfAttention())

        model.add(create_conv(self.dfc_dim * 4))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=self.alpha))

        model.add(create_conv(self.dfc_dim * 8))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=self.alpha))

        model.add(Flatten())
        model.add(Dense(units=1, activation=None))

        model.summary()

        img = Input(shape=self.img_shape)
        validity = model(img)

        return Model(img, validity)

Exemplo n.º 16

0

Exibir arquivo

 def __init__(self,
              word_vocab_size,
              embedding_size,
              word_padding_idx,
              num_layers,
              hidden_size,
              dropout,
              bidirectional=True,
              encode_multi_key=True):
     super(FAEncoder, self).__init__(word_vocab_size,
                                     embedding_size,
                                     word_padding_idx,
                                     num_layers,
                                     hidden_size,
                                     dropout,
                                     bidirectional=True)
     self.attn = SelfAttention(hidden_size)
     self.encode_multi_key = encode_multi_key

Exemplo n.º 17

0

Exibir arquivo

    def __init__(self, params):
        super(EncoderStack, self).__init__()
        self.layers = []
        for _ in range(params.num_hidden_layers):
            # Create sublayers for each layer.
            self_attention_layer = SelfAttention(params.hidden_size,
                                                 params.num_heads,
                                                 params.attention_dropout)
            feed_forward_network = FeedFowardNetwork(params.hidden_size,
                                                     params.filter_size,
                                                     params.relu_dropout)
            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params),
                PrePostProcessingWrapper(feed_forward_network, params)
            ])

        # Create final layer normalization layer.
        self.output_normalization = LayerNormalization(params.hidden_size)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: transformer.py Projeto: pop-ketle/transformer_attention

    def __init__(self, vocab_size, hopping_num, head_num, hidden_dim, dropout_rate, max_length, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.hopping_num  = hopping_num
        self.head_num     = head_num
        self.hidden_dim   = hidden_dim
        self.dropout_rate = dropout_rate

        self.token_embedding        = TokenEmbedding(vocab_size, hidden_dim)
        self.add_position_embedding = AddPositionalEncoding()
        self.input_dropout_layer    = tf.keras.layers.Dropout(dropout_rate)

        self.attention_block_list = [] # List[List[tf.keras.models.Model]]
        for _ in range(hopping_num):
            attention_layer = SelfAttention(hidden_dim, head_num, dropout_rate, name='self_attention')
            ffn_layer       = FeedForwardNetwork(hidden_dim, dropout_rate, name='ffn')
            self.attention_block_list.append([
                ResidualNormalizationWrapper(attention_layer, dropout_rate, name='self_attention_wrapper'),
                ResidualNormalizationWrapper(ffn_layer, dropout_rate, name='ffn_wrapper'),
            ])
        self.output_normalization = LayerNormalization()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: dcase_cnn.py Projeto: akio-kobayashi/acoustic_scene

    def __call__(self, x):

        for depth in range(4):
            x=Conv2D(filters=64*(depth+1),  # 64, 128, 192, 256
                    kernel_size=5, padding='same',
                    data_format='channels_last',
                    kernel_initializer='glorot_uniform')(x)
            x=BatchNormalization(axis=-1)(x)
            x=Activation('relu')(x)
            # 最大値プーリング, 各軸半分にする (40, 500) -> (20, 250) -> (10, 125) -> (5, 62) -> (2, 31)
            x=MaxPooling2D(pool_size=(2,2), data_format='channels_last')(x)
            x=Dropout(rate=self.dropout)(x)
        # 出力ベクトルの形状 (40//16, 500//16, 256) = (2, 31, 256)
        if self.attention == True:
            x=SelfAttention(x, output_filters=256)
            #x=MultiHeadAttention(head_num=64)(x)
            #x=BatchNormalization(axis=-1)(x)
            #x=GaussianNoise(0.3)(x)
        # 特徴量，時間方向に平均プーリング
        x=GlobalAveragePooling2D(data_format='channels_last')(x)
        # (256) 次元 -> (10) 次元
        output=Dense(units=ASC_CLASS, kernel_initializer='uniform', activation='softmax')(x)

        return output   # 10個の確率がおさまったベクトル

Exemplo n.º 20

0

Exibir arquivo

def processDescriptionParam(descOpts, bugReportDatabase, inputHandlers,
                            preprocessors, encoders, databasePath, cacheFolder,
                            logger, paddingSym):
    # Use summary and description (concatenated) to address this problem
    logger.info("Using Description information.")
    # Loading word embedding

    lexicon, embedding = load_embedding(descOpts, paddingSym)
    logger.info("Lexicon size: %d" % (lexicon.getLen()))
    logger.info("Word Embedding size: %d" % (embedding.getEmbeddingSize()))
    paddingId = lexicon.getLexiconIndex(paddingSym)
    # Loading Filters
    filters = loadFilters(descOpts['filters'])
    # Tokenizer
    if descOpts['tokenizer'] == 'default':
        logger.info("Use default tokenizer to tokenize summary information")
        tokenizer = MultiLineTokenizer()
    elif descOpts['tokenizer'] == 'white_space':
        logger.info(
            "Use white space tokenizer to tokenize summary information")
        tokenizer = WhitespaceTokenizer()
    else:
        raise ArgumentError(
            "Tokenizer value %s is invalid. You should choose one of these: default and white_space"
            % descOpts['tokenizer'])

    arguments = (databasePath, descOpts['word_embedding'],
                 str(descOpts['lexicon']),
                 ' '.join(sorted([fil.__class__.__name__ for fil in filters
                                  ])), descOpts['tokenizer'], "description")

    descCache = PreprocessingCache(cacheFolder, arguments)
    descPreprocessor = DescriptionPreprocessor(lexicon, bugReportDatabase,
                                               filters, tokenizer, paddingId,
                                               descCache)
    preprocessors.append(descPreprocessor)

    if descOpts['encoder_type'] == 'rnn':
        rnnType = descOpts.get('rnn_type')
        hiddenSize = descOpts.get('hidden_size')
        bidirectional = descOpts.get('bidirectional', False)
        numLayers = descOpts.get('num_layers', 1)
        dropout = descOpts.get('dropout', 0.0)
        updateEmb = descOpts.get('update_embedding', False)
        fixedOpt = descOpts.get('fixed_opt', False)

        descRNN = SortedRNNEncoder(rnnType, embedding, hiddenSize, numLayers,
                                   bidirectional, updateEmb, dropout)

        if fixedOpt == 'self_att':
            att = SelfAttention(descRNN.getOutputSize(),
                                descOpts['self_att_hidden'],
                                descOpts['n_hops'])
            descEncoder = RNN_Self_Attention(descRNN, att, paddingId, dropout)
        else:
            descEncoder = RNNFixedOuput(descRNN, fixedOpt, dropout)

        encoders.append(descEncoder)
        inputHandlers.append(RNNInputHandler(paddingId))
    elif descOpts['encoder_type'] == 'cnn':
        windowSizes = descOpts.get('window_sizes', [3])
        nFilters = descOpts.get('nfilters', 100)
        updateEmb = descOpts.get('update_embedding', False)
        actFunc = loadActivationFunction(descOpts.get('activation', 'relu'))
        batchNorm = descOpts.get('batch_normalization', False)
        dropout = descOpts.get('dropout', 0.0)

        descEncoder = TextCNN(windowSizes, nFilters, embedding, updateEmb,
                              actFunc, batchNorm, dropout)
        encoders.append(descEncoder)
        inputHandlers.append(TextCNNInputHandler(paddingId, max(windowSizes)))
    elif descOpts['encoder_type'] == 'cnn+dense':
        windowSizes = descOpts.get('window_sizes', [3])
        nFilters = descOpts.get('nfilters', 100)
        updateEmb = descOpts.get('update_embedding', False)
        actFunc = loadActivationFunction(descOpts.get('activation', 'relu'))
        batchNorm = descOpts.get('batch_normalization', False)
        dropout = descOpts.get('dropout', 0.0)
        hiddenSizes = descOpts.get('hidden_sizes')
        hiddenAct = loadActivationClass(descOpts.get('hidden_act'))
        hiddenDropout = descOpts.get('hidden_dropout')
        batchLast = descOpts.get("bn_last_layer", False)

        cnnEnc = TextCNN(windowSizes, nFilters, embedding, updateEmb, actFunc,
                         batchNorm, dropout)
        descEncoder = MultilayerDense(cnnEnc, hiddenSizes, hiddenAct,
                                      batchNorm, batchLast, hiddenDropout)
        encoders.append(descEncoder)
        inputHandlers.append(TextCNNInputHandler(paddingId, max(windowSizes)))
    elif descOpts['encoder_type'] == 'dense+self_att':
        dropout = descOpts.get('dropout', 0.0)
        hiddenSize = descOpts.get('hidden_size')
        self_att_hidden = descOpts['self_att_hidden']
        n_hops = descOpts['n_hops']
        updateEmb = descOpts.get('update_embedding', False)

        descEncoder = Dense_Self_Attention(embedding,
                                           hiddenSize,
                                           self_att_hidden,
                                           n_hops,
                                           paddingId,
                                           updateEmb,
                                           dropout=dropout)
        encoders.append(descEncoder)
        inputHandlers.append(TextCNNInputHandler(paddingId, -1))
    elif descOpts['encoder_type'] == 'word_mean':
        standardization = descOpts.get('standardization', False)
        dropout = descOpts.get('dropout', 0.0)
        updateEmb = descOpts.get('update_embedding', False)
        batch_normalization = descOpts.get('update_embedding', False)
        hiddenSize = descOpts.get('hidden_size')

        descEncoder = WordMean(embedding, updateEmb, hiddenSize,
                               standardization, dropout, batch_normalization)

        encoders.append(descEncoder)
        inputHandlers.append(RNNInputHandler(paddingId))
    else:
        raise ArgumentError(
            "Encoder type of summary and description is invalid (%s). You should choose one of these: cnn"
            % descOpts['encoder_type'])

Exemplo n.º 21

0

Exibir arquivo

Arquivo: train_Lstm_BiLstm_e.py Projeto: yephm/Keras_LSTMwithATT_model

def train_Lstm_BiLstm_e(p_n_symbols, p_embedding_weights, p_X_train, p_y_train,
                        p_X_test, p_y_test, p_X_train_e, p_X_test_e, vocab_dim,
                        input_length, input_length_e, batch_size, n_epoch):
    """
    模型建置
    """

    print('build model...')
    #text_Bilstm
    model_t = Sequential()
    model_t.add(
        Embedding(output_dim=vocab_dim,
                  input_dim=p_n_symbols,
                  mask_zero=True,
                  weights=[p_embedding_weights],
                  input_length=input_length))

    model_t.add(LSTM(output_dim=50))
    model_t.add(Dropout(0.5))
    model_t.add(SelfAttention())

    #emotion_Bilstm
    model_e = Sequential()
    model_e.add(
        Embedding(output_dim=vocab_dim,
                  input_dim=p_n_symbols,
                  mask_zero=True,
                  weights=[p_embedding_weights],
                  input_length=input_length_e))
    model_e.add(Bidirectional(LSTM(output_dim=50)))
    model_e.add(Dropout(0.5))
    model_e.add(SelfAttention())
    #merge text 跟 emotion 兩部分模型
    mergedOut = keras.layers.Add()([model_t.output, model_e.output])
    mergedOut = Dense(3, activation='softmax')(mergedOut)
    model = Sequential()
    model = Model([model_t.input, model_e.input], mergedOut)
    print('processing...')
    #調整學習率
    adam = keras.optimizers.Adam(lr=0.0001)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy', precision, recall, f1_score])

    print("training...")
    history = model.fit([p_X_train, p_X_train_e],
                        p_y_train,
                        batch_size=batch_size,
                        nb_epoch=n_epoch,
                        validation_data=([p_X_test, p_X_test_e], p_y_test))
    #透過matplot繪圖顯示訓練過程
    print("counting...")
    score, acc, prec, re, f1 = model.evaluate(p_X_test,
                                              p_y_test,
                                              batch_size=batch_size)
    print('Test score:', score)
    print('Test accuracy:', acc)
    print('Test precision:', prec)
    print('Test recall:', re)
    print('Test f1-score:', f1)
    plt.subplot(211)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='best')

    plt.subplot(212)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='best')
    plt.show()
    plt.savefig('Lstm_BiLstm_e')
    #儲存模型
    model.save('Lstm_BiLstm_e.h5')
    print('model is saved')