def start_attention(name, file_name=None): limit_gpu_memory() # read data path = os.path.join(PATHS['data_dir'], 'summation_data.pickle') dataset = pickle.load(open(path, "rb")) # create model print("Creating model...") model = create_model_attention(PREPROCESSING_PARAMS, HPARAMS, for_inference=False, use_embedding=False, bilstm=True) print("Compiling model...") model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if file_name is not None: print("Loading model's weights...") model.load_weights(os.path.join(PATHS['models_dir'], file_name)) # Callbacks tensorboard = TensorBoard(log_dir="{}/{}".format(PATHS['log_dir'], name), write_grads=True, write_graph=True, write_images=True) file_path = os.path.join(PATHS['models_dir'], f"{name}") checkpoint = ModelCheckpoint(file_path + "-{epoch:02d}-{val_loss:.2f}.h5", verbose=1, period=10) model.fit( x=dataset[0], y=dataset[1], batch_size=128, validation_split=0.05, shuffle=True, epochs=HPARAMS['num_epochs'], verbose=1, callbacks=[tensorboard, checkpoint], )
def start_attention_gen(name): limit_gpu_memory() # create model print("Creating model...") model = create_model_attention(PREPROCESSING_PARAMS, HPARAMS, for_inference=False, use_embedding=False) print("Compiling model...") model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Callbacks tensorboard = TensorBoard(log_dir="{}/{}".format(PATHS['log_dir'], name), write_grads=True, write_graph=True, write_images=True) file_path = os.path.join(PATHS['models_dir'], f"{name}") checkpoint = ModelCheckpoint(file_path + "-{epoch:02d}-{val_loss:.2f}.h5", verbose=1, period=10) train_gen = generate_batch(HPARAMS['batch_size'], HPARAMS['hidden_units']) test_gen = generate_batch(HPARAMS['batch_size'], HPARAMS['hidden_units']) train_num_batches = 1000 test_num_batches = 100 model.fit_generator( generator=train_gen, steps_per_epoch=train_num_batches, epochs=HPARAMS['num_epochs'], verbose=1, validation_data=test_gen, validation_steps=test_num_batches, callbacks=[tensorboard, checkpoint], )
def do_inference_attention(file_name=None): limit_gpu_memory() if file_name is not None: path = os.path.join(PATHS['models_dir'], file_name) else: path = PATHS["model"] model = create_model_attention(PREPROCESSING_PARAMS, HPARAMS, use_embedding=False, for_inference=False) model.load_weights(path) finished = False while not finished: text = input("Input text (to finish enter 'f'): ") if text == 'f': finished = True continue replies = reply_attention(text, model, PREPROCESSING_PARAMS, HPARAMS) # replies_without_unk = [r for r in replies if PREPROCESSING_PARAMS['unk'] not in r] # print(len(replies_without_unk)) # for r in replies_without_unk: print(replies)
def train_model(): # Vectorize the data. input_texts = [] target_texts = [] input_characters = set() target_characters = set() with open(data_path, 'r', encoding='utf-8') as f: lines = f.read().split('\n') for line in lines[:min(num_samples, len(lines) - 1)]: input_text, target_text = line.split('\t') # We use "tab" as the "start sequence" character # for the targets, and "\n" as "end sequence" character. target_text = '\t' + target_text + '\n' input_texts.append(input_text) target_texts.append(target_text) for char in input_text: if char not in input_characters: input_characters.add(char) for char in target_text: if char not in target_characters: target_characters.add(char) input_characters = sorted(list(input_characters)) target_characters = sorted(list(target_characters)) num_encoder_tokens = len(input_characters) num_decoder_tokens = len(target_characters) max_encoder_seq_length = max([len(txt) for txt in input_texts]) max_decoder_seq_length = max([len(txt) for txt in target_texts]) print('Number of samples:', len(input_texts)) print('Number of unique input tokens:', num_encoder_tokens) print('Number of unique output tokens:', num_decoder_tokens) print('Max sequence length for inputs:', max_encoder_seq_length) print('Max sequence length for outputs:', max_decoder_seq_length) input_token_index = dict([(char, i) for i, char in enumerate(input_characters)]) target_token_index = dict([(char, i) for i, char in enumerate(target_characters)]) encoder_input_data = np.zeros( (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') decoder_input_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') decoder_target_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): for t, char in enumerate(input_text): encoder_input_data[i, t, input_token_index[char]] = 1. for t, char in enumerate(target_text): # decoder_target_data is ahead of decoder_input_data by one timestep decoder_input_data[i, t, target_token_index[char]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. decoder_target_data[i, t - 1, target_token_index[char]] = 1. encoder_lstm = LSTM(units=hidden_units, return_state=True, name='encoder_lstm') decoder_lstm = LSTM(units=hidden_units, return_state=True, return_sequences=True, name='decoder_lstm') # model structure encoder_inputs = Input(shape=(max_encoder_seq_length, num_encoder_tokens), name='encoder_inputs') encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm( encoder_inputs) encoder_states = [encoder_state_h, encoder_state_c] decoder_inputs = Input(shape=(max_decoder_seq_length, num_decoder_tokens), name='decoder_inputs') decoder_outputs_lstm, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense') decoder_outputs = decoder_dense(decoder_outputs_lstm) # full model (for training) model = Model([encoder_inputs, decoder_inputs], decoder_outputs) limit_gpu_memory() # Run training model.compile(optimizer='adam', loss='categorical_crossentropy') checkpoint = ModelCheckpoint("weights2.{epoch:02d}-{val_loss:.2f}.h5", verbose=1, period=10) model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=batch_size, epochs=epochs, validation_split=0.1, callbacks=[checkpoint]) return model
if (sampled_char == '\n' or len(decoded_sentence) > max_decoder_seq_length): stop_condition = True # Update the target sequence (of length 1). target_seq = np.zeros((1, 1, num_decoder_tokens)) target_seq[0, 0, sampled_token_index] = 1. # Update states states_value = [h, c] return decoded_sentence if __name__ == '__main__': limit_gpu_memory() max_encoder_seq_length = 14 max_decoder_seq_length = 60 num_encoder_tokens = 72 num_decoder_tokens = 92 reverse_input_char_index, reverse_target_char_index, encoder_input_data, input_texts, target_token_index = prepare_data( ) encoder_model, decoder_model = load_inference_model() for seq_index in range(5000, 5500): # Take one sequence (part of the training set) # for trying out decoding. input_seq = encoder_input_data[seq_index:seq_index + 1] decoded_sentence = decode_sequence(input_seq, num_decoder_tokens, target_token_index,