Exemplo n.º 1
0
 def test_get_same(self):
     model = get_model(
         token_num=13,
         embed_dim=30,
         encoder_num=3,
         decoder_num=2,
         head_num=3,
         hidden_dim=120,
         attention_activation=None,
         feed_forward_activation='relu',
         dropout_rate=0.05,
         use_same_embed=True,
         embed_weights=np.random.random((13, 30)),
         trainable=False,
     )
     model.compile(
         optimizer=keras.optimizers.Adam(),
         loss=keras.losses.categorical_crossentropy,
         metrics={},
     )
     model_path = os.path.join(
         tempfile.gettempdir(),
         'test_transformer_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(model_path,
                                     custom_objects=get_custom_objects())
     model.summary()
     try:
         keras.utils.plot_model(model, 'transformer_same.png')
     except Exception as e:
         print(e)
     self.assertIsNotNone(model)
Exemplo n.º 2
0
def predict():
    with open('./models/target_token_dict.pkl', 'rb') as f:
        target_token_dict = pickle.load(f)
    with open('./models/source_token_dict.pkl', 'rb') as f:
        source_token_dict = pickle.load(f)

    target_token_dict_inv = {v: k for k, v in target_token_dict.items()}

    source_tokens_list = [
        t.split() for t in '''He lost.
    I try.
    I won!
    I runs.
    I came.
    He run.
    We lost.
    We runs in the park every day.
    He calmed down.
    See you about 8.
    He get you.
    She wears a wig.'''.split('\n') if t
    ]

    encode_tokens = [['<START>'] + tokens + ['<END>']
                     for tokens in source_tokens_list]
    encode_tokens = [
        tokens + ['<PAD>'] * (source_max_len - len(tokens))
        for tokens in encode_tokens
    ]
    encode_input = [
        list(
            map(
                lambda x: source_token_dict.get(x, source_token_dict[
                    '<UNKOWN>']), tokens)) for tokens in encode_tokens
    ]

    model = get_model(
        token_num=max(len(source_token_dict), len(target_token_dict)),
        embed_dim=32,
        encoder_num=2,
        decoder_num=2,
        head_num=4,
        hidden_dim=128,
        dropout_rate=0.05,
        use_same_embed=False,  # Use different embeddings for different languages
    )
    model.load_weights('./models/model.h5', by_name=True, reshape=True)
    # Predict
    decoded = decode(model,
                     encode_input,
                     start_token=target_token_dict['<START>'],
                     end_token=target_token_dict['<END>'],
                     pad_token=target_token_dict['<PAD>'],
                     max_repeat=len(encode_input),
                     max_repeat_block=len(encode_input))
    for i, source in enumerate(source_tokens_list):
        predicted = ''.join(
            map(lambda x: target_token_dict_inv[x], decoded[i][1:-1]))
        print("{},预测结果:{}".format(source, predicted))
Exemplo n.º 3
0
def train(
    use_checkpoint=True,
    initial_epoch=0,
):
    if use_checkpoint:
        transformer_model = keras_transformer.get_model(
            token_num=32006,
            embed_dim=768,
            encoder_num=4,
            decoder_num=4,
            head_num=8,
            hidden_dim=256,
            dropout_rate=0.1,
        )
        transformer_model.load_weights(
            'data/checkpoint/transformer_model.ckpt')
    else:
        bert_model = keras_bert.load_trained_model_from_checkpoint(
            checkpoint_file=checkpoint_file_path, config_file=config_file_path)
        bert_weights = bert_model.get_layer(
            name='Embedding-Token').get_weights()[0]
        transformer_model = get_transformer_on_bert_model(
            token_num=32006,
            embed_dim=768,
            encoder_num=4,
            decoder_num=4,
            head_num=8,
            hidden_dim=256,
            dropout_rate=0.1,
            embed_weights=bert_weights,
        )
    transformer_model.compile(
        optimizer=keras.optimizers.Adam(beta_2=0.98),
        #        optimizer=keras.optimizers.SGD(),
        #        optimizer='adam',
        loss=keras.losses.sparse_categorical_crossentropy,
        metrics=[keras.metrics.mae, keras.metrics.sparse_categorical_accuracy],
    )
    transformer_model.summary()
    history = transformer_model.fit_generator(
        generator=_generator(),
        steps_per_epoch=100,
        epochs=200,
        validation_data=_generator(),
        validation_steps=20,
        callbacks=[
            keras.callbacks.ModelCheckpoint(
                './data/checkpoint/transformer_model.ckpt',
                monitor='val_loss'),
            keras.callbacks.TensorBoard(log_dir='./data/log-adam-4000-D32/'),
            keras.callbacks.LearningRateScheduler(_decay),
            #            keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto'),
            PredictionCallback(encoder_inputs[0], 20),
        ],
        initial_epoch=initial_epoch,
    )
Exemplo n.º 4
0
    def create_transformer(self, params):

        transformer = get_model(
            token_num=params['dataset'].len_encoding,
            embed_dim=params['input_embedding_size'],
            encoder_num=params['encoder_num'],
            decoder_num=params['decoder_num'],
            head_num=params["num_heads"],
            hidden_dim=params["d_model"],
            attention_activation=None,
            dropout_rate=params["dropout_rate"],
            embed_weights=None
        )

        return transformer
Exemplo n.º 5
0
def model_transformer():
    # chars = chars +  '<START>'
    m = get_model(
        token_num=len(c_table.char_indices),
        embed_dim=EMBEDDING_DIM,  # word/character embedding dim
        encoder_num=3,
        decoder_num=2,
        head_num=2,
        hidden_dim=120,
        attention_activation='relu',
        feed_forward_activation='relu',
        dropout_rate=0.05,
        embed_weights=np.random.random((len(c_table.char_indices), EMBEDDING_DIM)),
    )
    return m
Exemplo n.º 6
0
Arquivo: enc_dec.py Projeto: hsha0/M-M
def create_transformer():

    model = get_model(token_num=SEQUENCE_LENGTH,
                      embed_dim=FLAGS.embedding_size,
                      encoder_num=3,
                      decoder_num=3,
                      head_num=8,
                      hidden_dim=FLAGS.num_cells,
                      attention_activation='relu',
                      feed_forward_activation='relu',
                      dropout_rate=0.05,
                      embed_weights=np.random.random(
                          (SEQUENCE_LENGTH, FLAGS.embedding_size)))

    opt = keras.optimizers.Adam(lr=FLAGS.learning_rate)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    return model
Exemplo n.º 7
0
    def getModel(self):
        print("Beginning to build the model.")
        model = get_model(
            token_num=max(len(self.source_token_dict),
                          len(self.target_token_dict)),
            embed_dim=HyperParameters.EMBED_DIM,
            encoder_num=HyperParameters.ENCODER_NUM,
            decoder_num=HyperParameters.DECODER_NUM,
            head_num=HyperParameters.HEAD_NUM,
            hidden_dim=HyperParameters.HIDDEN_DIM,
            dropout_rate=HyperParameters.DROPOUT_RATE,
            use_same_embed=False,
        )
        model.compile('adam',
                      'sparse_categorical_crossentropy',
                      metrics=['acc'])
        print("The model has been built successfully and the summary of it is")
        model.summary()

        return model
def load_transformer_model(config):
    model_params = config["model"]
    source_token_dict = config["vocab"]["source_token_dict"]
    target_token_dict = config["vocab"]["target_token_dict"]
    embed_dim = model_params["embed_dim"]
    hidden_dim = model_params["hidden_dim"]
    head_num = model_params["head_num"]
    encoder_num = model_params["encoder_num"]
    decoder_num = model_params["decoder_num"]
    dropout_rate = model_params["dropout_rate"]
    use_same_embed = bool(model_params["use_same_embed"])

    model = get_model(
        token_num=max(len(source_token_dict), len(target_token_dict)),
        embed_dim=embed_dim,
        encoder_num=encoder_num,
        decoder_num=decoder_num,
        head_num=head_num,
        hidden_dim=hidden_dim,
        dropout_rate=dropout_rate,
        use_same_embed=use_same_embed,
    )
    return model
Exemplo n.º 9
0
#print(tokens_codificador[120000])

entrada_codificador = [list(map(lambda x : diccionario_entrada[x], tokens)) for tokens in tokens_codificador]
entrada_decodificador = [list(map(lambda x : diccionario_salida[x], tokens)) for tokens in tokens_decodificador]
salida_decodificador = [list(map(lambda x : diccionario_salida[x], tokens)) for tokens in tokens_resultado]

#print(entrada_codificador[120000])


#Crear la red transformer
modelo = get_model(
#numero de lapalbras que esta utilizando el modelo
    token_num=max(len(diccionario_entrada),len(diccionario_salida)),
    embed_dim= 32,
    encoder_num=2,
    decoder_num=2,
    head_num=4,
    hidden_dim=128,
    dropout_rate = 0.05,
    use_same_embed = False,
)
modelo.compile('adam', 'sparse_categorical_crossentropy')
#modelo.summary()
modelo.load_weights('translator_preentrenado.h5')

#ENTRENAMIENTO:

#Arreglo con las dos entradas codificadas en español e inglés.
x = [np.array(entrada_codificador), np.array(entrada_decodificador)]
#salida.
y = np.array(salida_decodificador)
    decode_tokens = list(map(lambda x: token_dict[x], decode_tokens))
    output_tokens = list(map(lambda x: [token_dict[x]], output_tokens))
    encoder_inputs_no_padding.append(encode_tokens[:i + 2])
    encoder_inputs.append(encode_tokens)
    decoder_inputs.append(decode_tokens)
    decoder_outputs.append(output_tokens)

print(encoder_inputs)

# Build the model
model = get_model(
    token_num=len(token_dict),
    embed_dim=30,
    encoder_num=3,
    decoder_num=2,
    head_num=3,
    hidden_dim=120,
    attention_activation='relu',
    feed_forward_activation='relu',
    dropout_rate=0.05,
    embed_weights=np.random.random((13, 30)),
)
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
)
model.summary()
plot_model(model,
           to_file='model_plot.png',
           show_shapes=True,
           show_layer_names=True)
Exemplo n.º 11
0
print(encode_test_input[0])

#print("encode input")
#print(encode_input)
#print("decode input")
#print(decode_input)
#print("decode output")
#print(decode_output)

# Build & fit model
with tf.device("/cpu:0"):
    model_cpu = get_model(
        token_num=len(token_dict),
        embed_dim=32,
        encoder_num=6,
        decoder_num=6,
        head_num=8,
        hidden_dim=128,
        dropout_rate=0.05,
        use_same_embed=True,  # Use different embeddings for different languages
    )
model = multi_gpu_model(model_cpu, gpus=4)
model.compile('adam', 'sparse_categorical_crossentropy')
model.summary()

model.fit(
    x=[np.array(encode_input), np.array(decode_input)],
    y=np.array(decode_output),
    epochs=10,
    batch_size=128,
)
Exemplo n.º 12
0
def get_transformer_on_bert_model(
        token_num: int,
        embed_dim: int,
        encoder_num: int,
        decoder_num: int,
        head_num: int,
        hidden_dim: int,
        embed_weights,
        attention_activation=None,
        feed_forward_activation: str = 'relu',
        dropout_rate: float = 0.0,
        use_same_embed: bool = True,
        embed_trainable=True,
        trainable: bool = True) -> keras.engine.training.Model:
    """
    Transformerのモデルのinputsを特徴ベクトルにしたモデル.それ以外は特に変わらない.
    inputsのshapeは (None, seq_len, embed_dim) となっている,

    Parameters
    ----------
    token_num
        トークンのサイズ.(vocab_sizeと同じ)
    embed_dim
        特徴ベクトルの次元.inputsの次元数と同じにする.
    encoder_num
        エンコーダの層の数.
    decoder_num
        デコーダの層の数.
    head_num
        Multi-Head Attentionレイヤの分割ヘッド数.
    hidden_dim
        隠し層の次元数.
    embed_weights
        特徴ベクトルの初期化.
    attention_activation
        Attentionレイヤの活性化関数.
    feed_forward_activation
        FFNレイヤの活性化関数.
    dropout_rate
        Dropoutのレート.
    use_same_embed
        エンコーダとデコーダで同じweightsを使用するか.
    embed_trainable
        特徴ベクトルがトレーニング可能かどうか.
    trainable
        モデルがトレーニング可能かどうか.

    Returns
    -------
    model
        日本語学習済みのBERTの特徴ベクトルを用いたTransformerモデル
    """
    return keras_transformer.get_model(
        token_num=token_num,
        embed_dim=embed_dim,
        encoder_num=encoder_num,
        decoder_num=decoder_num,
        head_num=head_num,
        hidden_dim=hidden_dim,
        embed_weights=embed_weights,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        use_same_embed=use_same_embed,
        embed_trainable=embed_trainable,
        trainable=trainable)
Exemplo n.º 13
0
    def test_translate(self):
        source_tokens = [
            'i need more power'.split(' '),
            'eat jujube and pill'.split(' '),
        ]
        target_tokens = [
            list('我要更多的抛瓦'),
            list('吃枣💊'),
        ]

        # Generate dictionaries
        source_token_dict = self._build_token_dict(source_tokens)
        target_token_dict = self._build_token_dict(target_tokens)
        target_token_dict_inv = {v: k for k, v in target_token_dict.items()}

        # Add special tokens
        encode_tokens = [['<START>'] + tokens + ['<END>']
                         for tokens in source_tokens]
        decode_tokens = [['<START>'] + tokens + ['<END>']
                         for tokens in target_tokens]
        output_tokens = [
            tokens + ['<END>', '<PAD>'] for tokens in target_tokens
        ]

        # Padding
        source_max_len = max(map(len, encode_tokens))
        target_max_len = max(map(len, decode_tokens))

        encode_tokens = [
            tokens + ['<PAD>'] * (source_max_len - len(tokens))
            for tokens in encode_tokens
        ]
        decode_tokens = [
            tokens + ['<PAD>'] * (target_max_len - len(tokens))
            for tokens in decode_tokens
        ]
        output_tokens = [
            tokens + ['<PAD>'] * (target_max_len - len(tokens))
            for tokens in output_tokens
        ]

        encode_input = [
            list(map(lambda x: source_token_dict[x], tokens))
            for tokens in encode_tokens
        ]
        decode_input = [
            list(map(lambda x: target_token_dict[x], tokens))
            for tokens in decode_tokens
        ]
        decode_output = [
            list(map(lambda x: [target_token_dict[x]], tokens))
            for tokens in output_tokens
        ]

        # Build & fit model
        model = get_model(
            token_num=max(len(source_token_dict), len(target_token_dict)),
            embed_dim=32,
            encoder_num=2,
            decoder_num=2,
            head_num=4,
            hidden_dim=128,
            dropout_rate=0.05,
            use_same_embed=
            False,  # Use different embeddings for different languages
        )
        model.compile('adam', 'sparse_categorical_crossentropy')
        model.summary()
        model.fit(
            x=[np.array(encode_input * 1024),
               np.array(decode_input * 1024)],
            y=np.array(decode_output * 1024),
            epochs=10,
            batch_size=32,
        )

        # Predict
        decoded = decode(
            model,
            encode_input,
            start_token=target_token_dict['<START>'],
            end_token=target_token_dict['<END>'],
            pad_token=target_token_dict['<PAD>'],
        )
        for i in range(len(encode_input)):
            predicted = ''.join(
                map(lambda x: target_token_dict_inv[x], decoded[i][1:-1]))
            self.assertEqual(''.join(target_tokens[i]), predicted)
Exemplo n.º 14
0
def main(lang, input_file, output_file):
    exclude = set(string.punctuation + string.digits)

    input_token_index = config_lang_tsf[lang.lower()]['input_token_index']
    target_token_index = config_lang_tsf[lang.lower()]['target_token_index']
    max_encoder_seq_length = config_lang_tsf[
        lang.lower()]['max_encoder_seq_length']
    params = config_lang_tsf[lang.lower()]['params']
    target_max_len = 50
    token_num = max(len(target_token_index), len(input_token_index))

    model = get_model(token_num=token_num,
                      embed_dim=params['embed_dim'],
                      encoder_num=params['encoder_num'],
                      decoder_num=params['decoder_num'],
                      head_num=params['head_num'],
                      hidden_dim=params['hidden_dim'],
                      dropout_rate=params['dropout_rate'],
                      use_same_embed=False,
                      embed_weights=np.random.random(
                          (token_num, params['embed_dim'])))

    model_path = 'models_transformer/' + lang.lower(
    ) + '_clean_28042020.csv_transformer.keras'
    model.load_weights(model_path)

    input_texts = []
    with open(input_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()[:]

    for line in lines:
        for wd in line.strip().split():
            if wd not in input_texts:
                if all([
                        ch in input_token_index for ch in wd.lower()
                        if ch not in exclude
                ]):
                    s = ''.join(ch for ch in wd.lower() if ch not in exclude)
                    if len(s):
                        input_texts.append([x for x in s.lower().strip()])

    reverse_input_char_index = dict(
        (i, char) for char, i in input_token_index.items())
    reverse_target_char_index = dict(
        (i, char) for char, i in target_token_index.items())

    test_encode_tokens = [['<START>'] + tokens + ['<END>']
                          for tokens in input_texts]
    test_encode_tokens = [
        tokens + ['<PAD>'] * (50 - len(tokens))
        for tokens in test_encode_tokens
    ]
    test_input = [
        list(map(lambda x: input_token_index[x], tokens))
        for tokens in test_encode_tokens
    ]

    print("predicting ...")
    decoded = {}
    for i in range(len(test_input)):
        int_decoded = []
        prediction = decode(model,
                            test_input[i],
                            start_token=target_token_index['<START>'],
                            end_token=target_token_index['<END>'],
                            pad_token=target_token_index['<PAD>'],
                            max_len=token_num + 2 + 5)

        wd = ''.join(input_texts[i])
        for j in range(1, len(prediction)):
            if reverse_target_char_index[prediction[j]] in [
                    '<PAD>', '<END>', '<START>'
            ]:
                break
            else:
                int_decoded.append(prediction[j])
        decoded[wd] = ' '.join(
            map(lambda x: reverse_target_char_index[x], int_decoded))

    print(decoded)
    with open(output_file, 'w') as fout:
        for i in range(len(lines)):
            fout.write("%s|" % lines[i].strip())
            for wd in lines[i].strip().lower().split():
                wd_strip = ''.join(ch for ch in wd.lower()
                                   if ch not in exclude)
                if wd_strip in decoded:
                    fout.write("[%s] " % decoded[wd_strip])
                else:
                    fout.write("[UNK] ")
            fout.write("\n")

    print('\n' + "*" * 20)
    print("DONE! Wrote %d lines to %s..." % (len(lines), output_file))
    print("*" * 20 + '\n')
Exemplo n.º 15
0
        list(map(lambda x: target_token_dict[x], tokens))
        for tokens in decode_tokens
    ]
    decode_output = [
        list(map(lambda x: [target_token_dict[x]], tokens))
        for tokens in output_tokens
    ]

    if EPOCHS > 0:
        # Build & fit model
        model = get_model(
            token_num=max(len(source_token_dict), len(target_token_dict)),
            embed_dim=EMBED_DIM,
            encoder_num=LAYERS,
            decoder_num=LAYERS,
            head_num=ATTN_HEADS,
            hidden_dim=HIDDEN_DIM,
            dropout_rate=DROPOUT,
            use_same_embed=
            False,  # Use different embeddings for different languages
        )

        model.compile('adam', 'sparse_categorical_crossentropy')
        model.summary()

        model.fit(x=[
            np.array(encode_input * DATA_MULTIPLIER),
            np.array(decode_input * DATA_MULTIPLIER)
        ],
                  y=np.array(decode_output * DATA_MULTIPLIER),
                  epochs=EPOCHS,
]
decode_input = [
    list(map(lambda x: target_token_dict[x], tokens))
    for tokens in decode_tokens
]
decode_output = [
    list(map(lambda x: [target_token_dict[x]], tokens))
    for tokens in output_tokens
]

# Build & fit model
model = get_model(
    token_num=max(len(source_token_dict), len(target_token_dict)),
    embed_dim=32,
    encoder_num=2,
    decoder_num=2,
    head_num=4,
    hidden_dim=128,
    dropout_rate=0.05,
    use_same_embed=False,  # Use different embeddings for different languages
)
model.compile('adam', 'sparse_categorical_crossentropy')
model.summary()

print(encode_input[0], " : ", decode_input[0], " : ", decode_output[0])
model.fit(
    x=[np.array(encode_input * 1024),
       np.array(decode_input * 1024)],
    y=np.array(decode_output * 1024),
    epochs=5,
    batch_size=32,
)
Exemplo n.º 17
0
    def train(self, train_file='/home/gswyhq/data/cmn-eng/cmn.txt'):
        source_tokens = [
            'i need more power'.split(' '),
            'eat jujube and pill'.split(' '),
        ]
        target_tokens = [
            list('我要更多的抛瓦'),
            list('吃枣💊'),
        ]

        with open(train_file) as f:
            for data in f.readlines():
                if '\t' in data:
                    source, target = data.strip().split('\t', maxsplit=1)
                    source_tokens.append(source.split(' '))
                    target_tokens.append(list(target))

        # Generate dictionaries
        source_token_dict = self._build_token_dict(source_tokens)
        target_token_dict = self._build_token_dict(target_tokens)
        target_token_dict_inv = {v: k for k, v in target_token_dict.items()}

        # Add special tokens
        encode_tokens = [['<START>'] + tokens + ['<END>']
                         for tokens in source_tokens]
        decode_tokens = [['<START>'] + tokens + ['<END>']
                         for tokens in target_tokens]
        output_tokens = [
            tokens + ['<END>', '<PAD>'] for tokens in target_tokens
        ]
        # print('output_tokens: {}'.format(output_tokens))
        # Padding
        # source_max_len = max(map(len, encode_tokens))
        # target_max_len = max(map(len, decode_tokens))

        print('source_max_len: {}; target_max_len: {}'.format(
            source_max_len,
            target_max_len))  # source_max_len: 34; target_max_len: 46
        print("len(source_token_dict): {}, len(target_token_dict): {}".format(
            len(source_token_dict), len(target_token_dict))
              )  # len(source_token_dict): 10814, len(target_token_dict): 3442

        with open('./models/target_token_dict.pkl', 'wb') as f:
            pickle.dump(target_token_dict, f)

        with open('./models/source_token_dict.pkl', 'wb') as f:
            pickle.dump(source_token_dict, f)

        encode_tokens = [
            tokens + ['<PAD>'] * (source_max_len - len(tokens))
            for tokens in encode_tokens
        ]
        decode_tokens = [
            tokens + ['<PAD>'] * (target_max_len - len(tokens))
            for tokens in decode_tokens
        ]
        output_tokens = [
            tokens + ['<PAD>'] * (target_max_len - len(tokens))
            for tokens in output_tokens
        ]
        # print('output_tokens: {}'.format(output_tokens))
        encode_input = [
            list(map(lambda x: source_token_dict[x], tokens))
            for tokens in encode_tokens
        ]
        decode_input = [
            list(map(lambda x: target_token_dict[x], tokens))
            for tokens in decode_tokens
        ]
        decode_output = [
            list(map(lambda x: [target_token_dict[x]], tokens))
            for tokens in output_tokens
        ]
        # print("decode_output: {}".format(decode_output))
        # Build & fit model
        model = get_model(
            token_num=max(len(source_token_dict), len(target_token_dict)),
            embed_dim=32,
            encoder_num=2,
            decoder_num=2,
            head_num=4,
            hidden_dim=128,
            dropout_rate=0.05,
            use_same_embed=
            False,  # Use different embeddings for different languages
        )
        model.compile('adam', 'sparse_categorical_crossentropy')
        model.summary()

        early_stopping = EarlyStopping(monitor='loss', patience=3)

        model_checkpoint = ModelCheckpoint(filepath=os.path.join(
            './models', 'translate-{epoch:02d}-{loss:.4f}.hdf5'),
                                           save_best_only=False,
                                           save_weights_only=False)

        model.fit(x=[np.array(encode_input * 1),
                     np.array(decode_input * 1)],
                  y=np.array(decode_output * 1),
                  epochs=10,
                  batch_size=32,
                  callbacks=[early_stopping, model_checkpoint])

        model.save('./models/model.h5')

        # Predict
        encode_input = encode_input[:30]
        decoded = decode(model,
                         encode_input,
                         start_token=target_token_dict['<START>'],
                         end_token=target_token_dict['<END>'],
                         pad_token=target_token_dict['<PAD>'],
                         max_repeat=len(encode_input),
                         max_repeat_block=len(encode_input))

        right_count = 0
        error_count = 0

        for i in range(len(encode_input)):
            predicted = ''.join(
                map(lambda x: target_token_dict_inv[x], decoded[i][1:-1]))
            print("原始结果:{},预测结果:{}".format(''.join(target_tokens[i]),
                                           predicted))

            if ''.join(target_tokens[i]) == predicted:
                right_count += 1
            else:
                error_count += 1

        print("正确: {}, 错误:{}, 正确率: {}".format(
            right_count, error_count,
            right_count / (right_count + error_count + 0.001)))
Exemplo n.º 18
0
    def test_decode(self):
        tokens = 'all work and no play makes jack a dull boy'.split(' ')
        token_dict = {
            '<PAD>': 0,
            '<START>': 1,
            '<END>': 2,
        }
        for token in tokens:
            if token not in token_dict:
                token_dict[token] = len(token_dict)
        model = get_model(
            token_num=len(token_dict),
            embed_dim=32,
            encoder_num=3,
            decoder_num=2,
            head_num=4,
            hidden_dim=128,
            dropout_rate=0.05,
        )
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
        )
        model.summary()
        encoder_inputs_no_padding = []
        encoder_inputs, decoder_inputs, decoder_outputs = [], [], []
        for i in range(1, len(tokens)):
            encode_tokens, decode_tokens = tokens[:i], tokens[i:]
            encode_tokens = ['<START>'] + encode_tokens + [
                '<END>'
            ] + ['<PAD>'] * (len(tokens) - len(encode_tokens))
            output_tokens = decode_tokens + [
                '<END>', '<PAD>'
            ] + ['<PAD>'] * (len(tokens) - len(decode_tokens))
            decode_tokens = ['<START>'] + decode_tokens + [
                '<END>'
            ] + ['<PAD>'] * (len(tokens) - len(decode_tokens))
            encode_tokens = list(map(lambda x: token_dict[x], encode_tokens))
            decode_tokens = list(map(lambda x: token_dict[x], decode_tokens))
            output_tokens = list(map(lambda x: [token_dict[x]], output_tokens))
            encoder_inputs_no_padding.append(encode_tokens[:i + 2])
            encoder_inputs.append(encode_tokens)
            decoder_inputs.append(decode_tokens)
            decoder_outputs.append(output_tokens)
        current_path = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(current_path, 'test_transformer.h5')
        if os.path.exists(model_path):
            model.load_weights(model_path, by_name=True)
        else:
            model.fit(
                x=[
                    np.asarray(encoder_inputs * 2048),
                    np.asarray(decoder_inputs * 2048)
                ],
                y=np.asarray(decoder_outputs * 2048),
                epochs=10,
                batch_size=128,
            )
            model.save(model_path)
        model = keras.models.load_model(model_path,
                                        custom_objects=get_custom_objects())
        decoded = decode(
            model,
            encoder_inputs_no_padding * 2,
            start_token=token_dict['<START>'],
            end_token=token_dict['<END>'],
            pad_token=token_dict['<PAD>'],
        )
        token_dict_rev = {v: k for k, v in token_dict.items()}
        for i in range(len(decoded)):
            print(' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1])))
        for i in range(len(decoded)):
            for j in range(len(decoded[i])):
                self.assertEqual(decoder_inputs[i % len(decoder_inputs)][j],
                                 decoded[i][j])

        decoded = decode(
            model,
            encoder_inputs_no_padding[2] + [0] * 5,
            start_token=token_dict['<START>'],
            end_token=token_dict['<END>'],
            pad_token=token_dict['<PAD>'],
        )
        for j in range(len(decoded)):
            self.assertEqual(decoder_inputs[2][j], decoded[j], decoded)

        decoded = decode(
            model,
            encoder_inputs_no_padding,
            start_token=token_dict['<START>'],
            end_token=token_dict['<END>'],
            pad_token=token_dict['<PAD>'],
            max_len=4,
        )
        token_dict_rev = {v: k for k, v in token_dict.items()}
        for i in range(len(decoded)):
            print(' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1])))
        for i in range(len(decoded)):
            self.assertTrue(len(decoded[i]) <= 4, decoded[i])
            for j in range(len(decoded[i])):
                self.assertEqual(decoder_inputs[i][j], decoded[i][j], decoded)

        decoded_top_5 = decode(
            model,
            encoder_inputs_no_padding,
            start_token=token_dict['<START>'],
            end_token=token_dict['<END>'],
            pad_token=token_dict['<PAD>'],
            max_len=4,
            top_k=5,
            temperature=1e-10,
        )
        has_diff = False
        for i in range(len(decoded)):
            s1 = ' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1]))
            s5 = ' '.join(
                map(lambda x: token_dict_rev[x], decoded_top_5[i][1:-1]))
            if s1 != s5:
                has_diff = True
        self.assertFalse(has_diff)

        decoded_top_5 = decode(
            model,
            encoder_inputs_no_padding,
            start_token=token_dict['<START>'],
            end_token=token_dict['<END>'],
            pad_token=token_dict['<PAD>'],
            max_len=4,
            top_k=5,
        )
        has_diff = False
        for i in range(len(decoded)):
            s1 = ' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1]))
            s5 = ' '.join(
                map(lambda x: token_dict_rev[x], decoded_top_5[i][1:-1]))
            if s1 != s5:
                has_diff = True
        self.assertTrue(has_diff)
Exemplo n.º 19
0
]
print(output_decoded[120000])

### CREAR la red transformer ###
# token_num numero maximo de palabras del diccionario de ingles y español 25269
# embed_dim emdeding de entrada 32
# encoder_num numero de encoders 6
# decoder_num numero de decodificadores 6
# head_num bloques atencionales busca las relaciones entre frases
# hidden_dim capa oculta de 128 neuronas
# dropout_rate desactivar 0.05 neuronas aleatoriamente para evitar overfitting
model = get_model(
    token_num=max(len(source_token_dict), len(target_token_dict)),
    embed_dim=32,
    encoder_num=2,
    decoder_num=2,
    head_num=8,
    hidden_dim=128,
    dropout_rate=0.05,
    use_same_embed=False,
)
model.compile('adam', 'sparse_categorical_crossentropy')
# resumen del modelo
model.summary()

# Entrenamiento del modelo
x = [np.array(encoder_input), np.array(decoder_input)]
y = np.array(output_decoded)
# Entrenar 30 epocas con grupos de 32 frases
#model.fit(x,y, epochs=30, batch_size=32)

# Guardar modelo
Exemplo n.º 20
0
def train(
    use_checkpoint=True,
    initial_epoch=0,
):
    if use_checkpoint:
        transformer_model = keras_transformer.get_model(
            token_num=32000,
            embed_dim=768,
            encoder_num=4,
            decoder_num=4,
            head_num=8,
            hidden_dim=512,
            attention_activation='relu',
            feed_forward_activation='relu',
            dropout_rate=0.1,
        )
        transformer_model.load_weights(
            './data/checkpoint/transformer_onbert_model-Adam4000-Dall.ckpt')
    else:
        bert_model = keras_bert.load_trained_model_from_checkpoint(
            checkpoint_file=checkpoint_file_path, config_file=config_file_path)
        bert_weights = bert_model.get_layer(
            name='Embedding-Token').get_weights()[0]
        transformer_model = keras_transformer.get_model(
            token_num=32000,
            embed_dim=768,
            encoder_num=4,
            decoder_num=4,
            head_num=8,
            hidden_dim=512,
            attention_activation='relu',
            feed_forward_activation='relu',
            dropout_rate=0.1,
            embed_weights=bert_weights,
        )
    transformer_model.compile(
        optimizer=keras.optimizers.Adam(beta_2=0.98),
        #        optimizer=keras.optimizers.SGD(),
        #        optimizer='adam',
        loss=keras.losses.sparse_categorical_crossentropy,
        metrics=[keras.metrics.mae, keras.metrics.sparse_categorical_accuracy],
    )
    transformer_model.summary()
    tb = keras.callbacks.TensorBoard(
        log_dir='./data/log-adam-4000-Dall-onbert/')
    try:
        history = transformer_model.fit_generator(
            generator=_generator(),
            steps_per_epoch=100,
            epochs=1000,
            validation_data=_generator(),
            validation_steps=20,
            callbacks=[
                keras.callbacks.ModelCheckpoint(
                    './data/checkpoint/transformer_onbert_model-Adam4000-Dall.ckpt',
                    monitor='val_loss'),
                tb,
                keras.callbacks.LearningRateScheduler(_decay),
                PredictionCallback(generator_data[:2, 0], 30),
            ],
            initial_epoch=initial_epoch,
        )
    except KeyboardInterrupt:
        tb.writer.close()
Exemplo n.º 21
0
        decoder_inputs.append(decode_tokens)
        # decoder_outputs.append(output_tokens)
    return np.asarray(encoder_inputs),np.asarray(decoder_inputs)

seq1_input,seq2_input = gen_toy_data(s1s_train,s2s_train)
seq1_input_dev,seq2_input_dev = gen_toy_data(s1s_dev,s2s_dev)
seq1_input_test,seq2_input_test = gen_toy_data(s1s_test,s2s_test)    

# Build the model
model = get_model(
    token_num = len(token_dict),
    embed_dim = 300,
    encoder_num = 3,
    decoder_num = 2,
    head_num = 6,
    hidden_dim = 256,
    attention_activation ='relu',
    feed_forward_activation ='relu',
    dropout_rate = 0.05,
    embed_weights = embed_matrix ,
    embed_trainable = True
)

def model_qa():
    seq1_in = model.inputs[0]
    seq2_in = model.inputs[1]
    decode_layer = model.get_layer("Decoder-2-FeedForward-Norm").output
    final_rep = TimeDistributed(Dense(2, use_bias=False))(decode_layer)
    return Model(inputs=[seq1_in,seq2_in],outputs=final_rep)
    
model_qa = model_qa()