示例#1
0
def start_attention(name, file_name=None):
    limit_gpu_memory()
    # read data
    path = os.path.join(PATHS['data_dir'], 'summation_data.pickle')
    dataset = pickle.load(open(path, "rb"))

    # create model
    print("Creating model...")
    model = create_model_attention(PREPROCESSING_PARAMS,
                                   HPARAMS,
                                   for_inference=False,
                                   use_embedding=False,
                                   bilstm=True)
    print("Compiling model...")
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    if file_name is not None:
        print("Loading model's weights...")
        model.load_weights(os.path.join(PATHS['models_dir'], file_name))

    # Callbacks
    tensorboard = TensorBoard(log_dir="{}/{}".format(PATHS['log_dir'], name),
                              write_grads=True,
                              write_graph=True,
                              write_images=True)
    file_path = os.path.join(PATHS['models_dir'], f"{name}")
    checkpoint = ModelCheckpoint(file_path + "-{epoch:02d}-{val_loss:.2f}.h5",
                                 verbose=1,
                                 period=10)

    model.fit(
        x=dataset[0],
        y=dataset[1],
        batch_size=128,
        validation_split=0.05,
        shuffle=True,
        epochs=HPARAMS['num_epochs'],
        verbose=1,
        callbacks=[tensorboard, checkpoint],
    )
示例#2
0
def start_attention_gen(name):
    limit_gpu_memory()

    # create model
    print("Creating model...")
    model = create_model_attention(PREPROCESSING_PARAMS,
                                   HPARAMS,
                                   for_inference=False,
                                   use_embedding=False)
    print("Compiling model...")
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    # Callbacks
    tensorboard = TensorBoard(log_dir="{}/{}".format(PATHS['log_dir'], name),
                              write_grads=True,
                              write_graph=True,
                              write_images=True)
    file_path = os.path.join(PATHS['models_dir'], f"{name}")
    checkpoint = ModelCheckpoint(file_path + "-{epoch:02d}-{val_loss:.2f}.h5",
                                 verbose=1,
                                 period=10)

    train_gen = generate_batch(HPARAMS['batch_size'], HPARAMS['hidden_units'])
    test_gen = generate_batch(HPARAMS['batch_size'], HPARAMS['hidden_units'])

    train_num_batches = 1000
    test_num_batches = 100

    model.fit_generator(
        generator=train_gen,
        steps_per_epoch=train_num_batches,
        epochs=HPARAMS['num_epochs'],
        verbose=1,
        validation_data=test_gen,
        validation_steps=test_num_batches,
        callbacks=[tensorboard, checkpoint],
    )
示例#3
0
def do_inference_attention(file_name=None):
    limit_gpu_memory()
    if file_name is not None:
        path = os.path.join(PATHS['models_dir'], file_name)
    else:
        path = PATHS["model"]

    model = create_model_attention(PREPROCESSING_PARAMS,
                                   HPARAMS,
                                   use_embedding=False,
                                   for_inference=False)
    model.load_weights(path)

    finished = False
    while not finished:
        text = input("Input text (to finish enter 'f'): ")
        if text == 'f':
            finished = True
            continue
        replies = reply_attention(text, model, PREPROCESSING_PARAMS, HPARAMS)
        # replies_without_unk = [r for r in replies if PREPROCESSING_PARAMS['unk'] not in r]
        # print(len(replies_without_unk))
        # for r in replies_without_unk:
        print(replies)
示例#4
0
def train_model():
    # Vectorize the data.
    input_texts = []
    target_texts = []
    input_characters = set()
    target_characters = set()
    with open(data_path, 'r', encoding='utf-8') as f:
        lines = f.read().split('\n')
    for line in lines[:min(num_samples, len(lines) - 1)]:
        input_text, target_text = line.split('\t')
        # We use "tab" as the "start sequence" character
        # for the targets, and "\n" as "end sequence" character.
        target_text = '\t' + target_text + '\n'
        input_texts.append(input_text)
        target_texts.append(target_text)
        for char in input_text:
            if char not in input_characters:
                input_characters.add(char)
        for char in target_text:
            if char not in target_characters:
                target_characters.add(char)

    input_characters = sorted(list(input_characters))
    target_characters = sorted(list(target_characters))
    num_encoder_tokens = len(input_characters)
    num_decoder_tokens = len(target_characters)
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    print('Number of samples:', len(input_texts))
    print('Number of unique input tokens:', num_encoder_tokens)
    print('Number of unique output tokens:', num_decoder_tokens)
    print('Max sequence length for inputs:', max_encoder_seq_length)
    print('Max sequence length for outputs:', max_decoder_seq_length)

    input_token_index = dict([(char, i)
                              for i, char in enumerate(input_characters)])
    target_token_index = dict([(char, i)
                               for i, char in enumerate(target_characters)])

    encoder_input_data = np.zeros(
        (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
        dtype='float32')
    decoder_input_data = np.zeros(
        (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
        dtype='float32')
    decoder_target_data = np.zeros(
        (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
        dtype='float32')

    for i, (input_text,
            target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.
        for t, char in enumerate(target_text):
            # decoder_target_data is ahead of decoder_input_data by one timestep
            decoder_input_data[i, t, target_token_index[char]] = 1.
            if t > 0:
                # decoder_target_data will be ahead by one timestep
                # and will not include the start character.
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.

    encoder_lstm = LSTM(units=hidden_units,
                        return_state=True,
                        name='encoder_lstm')

    decoder_lstm = LSTM(units=hidden_units,
                        return_state=True,
                        return_sequences=True,
                        name='decoder_lstm')

    # model structure
    encoder_inputs = Input(shape=(max_encoder_seq_length, num_encoder_tokens),
                           name='encoder_inputs')
    encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(
        encoder_inputs)
    encoder_states = [encoder_state_h, encoder_state_c]

    decoder_inputs = Input(shape=(max_decoder_seq_length, num_decoder_tokens),
                           name='decoder_inputs')
    decoder_outputs_lstm, _, _ = decoder_lstm(decoder_inputs,
                                              initial_state=encoder_states)
    decoder_dense = Dense(units=num_decoder_tokens,
                          activation='softmax',
                          name='decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs_lstm)

    # full model (for training)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    limit_gpu_memory()
    # Run training
    model.compile(optimizer='adam', loss='categorical_crossentropy')

    checkpoint = ModelCheckpoint("weights2.{epoch:02d}-{val_loss:.2f}.h5",
                                 verbose=1,
                                 period=10)
    model.fit([encoder_input_data, decoder_input_data],
              decoder_target_data,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.1,
              callbacks=[checkpoint])

    return model
示例#5
0
        if (sampled_char == '\n'
                or len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence


if __name__ == '__main__':
    limit_gpu_memory()
    max_encoder_seq_length = 14
    max_decoder_seq_length = 60
    num_encoder_tokens = 72
    num_decoder_tokens = 92

    reverse_input_char_index, reverse_target_char_index, encoder_input_data, input_texts, target_token_index = prepare_data(
    )
    encoder_model, decoder_model = load_inference_model()

    for seq_index in range(5000, 5500):
        # Take one sequence (part of the training set)
        # for trying out decoding.
        input_seq = encoder_input_data[seq_index:seq_index + 1]
        decoded_sentence = decode_sequence(input_seq, num_decoder_tokens,
                                           target_token_index,