def train(self, x_train, y_train, x_valid=None, y_valid=None, epochs=1, batch_size=32, verbose=1, callbacks=None, shuffle=True): """Trains the model for a fixed number of epochs (iterations on a dataset). Args: x_train: list of training data. y_train: list of training target (label) data. x_valid: list of validation data. y_valid: list of validation target (label) data. batch_size: Integer. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. epochs: Integer. Number of epochs to train the model. verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. callbacks: List of `keras.callbacks.Callback` instances. List of callbacks to apply during training. shuffle: Boolean (whether to shuffle the training data before each epoch). `shuffle` will default to True. """ train_seq = NERSequence(x_train, y_train, batch_size, self._preprocessor.transform) if x_valid and y_valid: valid_seq = NERSequence(x_valid, y_valid, batch_size, self._preprocessor.transform) f1 = F1score(valid_seq, preprocessor=self._preprocessor) es = EarlyStopping(monitor='loss', mode='min', verbose=1) callbacks = [f1,es] + callbacks if callbacks else [f1,es] self._model.fit_generator(generator=train_seq, epochs=epochs, callbacks=callbacks, verbose=verbose, shuffle=shuffle)
def train(self, x_train, y_train, x_valid=None, y_valid=None, epochs=1, batch_size=32, verbose=1, callbacks=None, shuffle=True): """Trains the model for a fixed number of epochs (iterations on a dataset). Args: x_train: list of training data. y_train: list of training target (label) data. x_valid: list of validation data. y_valid: list of validation target (label) data. batch_size: Integer. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. epochs: Integer. Number of epochs to train the model. verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. callbacks: List of `keras.callbacks.Callback` instances. List of callbacks to apply during training. shuffle: Boolean (whether to shuffle the training data before each epoch). `shuffle` will default to True. """ # Prepare training and validation data(steps, generator) train_steps, train_generator = batch_iter(x_train, y_train, batch_size, shuffle=shuffle, preprocessor=self._preprocessor) if x_valid and y_valid: valid_steps, valid_generator = batch_iter(x_valid, y_valid, batch_size, shuffle=False, preprocessor=self._preprocessor) f1 = F1score(valid_steps, valid_generator, preprocessor=self._preprocessor) callbacks = [f1] + callbacks if callbacks else [f1] # Train the model self._model.fit_generator(generator=train_generator, steps_per_epoch=train_steps, epochs=epochs, callbacks=callbacks, verbose=verbose)
def training(train, test): x_train = [x.split() for x in train['sentence'].tolist()] y_train = train['tag'].tolist() x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.8, random_state=233) print('Transforming datasets...') p = IndexTransformer(use_char=True) p.fit(x_train, y_train) embeddings = load_glove(config.glove_file) embeddings = filter_embeddings(embeddings, p._word_vocab.vocab, config.glove_size) model = BiLSTMCRF(char_vocab_size=p.char_vocab_size, word_vocab_size=p.word_vocab_size, num_labels=p.label_size, word_embedding_dim=300, char_embedding_dim=100, word_lstm_size=100, char_lstm_size=50, fc_dim=100, dropout=0.5, embeddings=embeddings, use_char=True, use_crf=True) opt = Adam(lr=0.001) model, loss = model.build() model.compile(loss=loss, optimizer=opt, metrics=[crf_viterbi_accuracy]) filepath = '../models/' + 'best_model' ckp = ModelCheckpoint(filepath + '.h5', monitor='val_crf_viterbi_accuracy', verbose=1, save_best_only=True, mode='max', save_weights_only=True) es = EarlyStopping(monitor='val_crf_viterbi_accuracy', min_delta=0.00001, patience=3, verbose=1, mode='max') rlr = ReduceLROnPlateau(monitor='val_crf_viterbi_accuracy', factor=0.2, patience=2, verbose=1, mode='max', min_delta=0.0001) callbacks = [ckp, es, rlr] train_seq = NERSequence(x_train, y_train, config.batch_size, p.transform) if x_val and y_val: valid_seq = NERSequence(x_val, y_val, config.batch_size, p.transform) f1 = F1score(valid_seq, preprocessor=p) callbacks.append(f1) model.fit_generator(generator=train_seq, validation_data=valid_seq, epochs=config.nepochs, callbacks=callbacks, verbose=True, shuffle=True, use_multiprocessing=True, workers=42)
def training(train, test, fold): x_train = [x.split() for x in train['sentence'].tolist()] y_train = train['tag'].tolist() x_test = [x.split() for x in test['sentence'].tolist()] print('Transforming datasets...') p = IndexTransformer(use_char=True) p.fit(x_train + x_test, y_train) skf = KFold(n_splits=config.nfolds, random_state=config.seed, shuffle=True) embeddings = load_glove(config.glove_file) # embeddings_fast = load_glove(config.glove_file) embeddings_wang = load_glove(config.wang_file) embeddings = filter_embeddings(embeddings, p._word_vocab.vocab, config.glove_size) # embeddings_fast = filter_embeddings(embeddings_fast, p._word_vocab.vocab, config.fasttext_size) embeddings_wang = filter_embeddings(embeddings_wang, p._word_vocab.vocab, config.wang_size) embeddings = np.concatenate((embeddings, embeddings_wang), axis=1) for n_fold, (train_indices, val_indices) in enumerate(skf.split(x_train)): if n_fold >= fold: print("Training fold: ", n_fold) x_val = list(np.array(x_train)[val_indices]) y_val = list(np.array(y_train)[val_indices]) x_train_spl = list(np.array(x_train)[train_indices]) y_train_spl = list(np.array(y_train)[train_indices]) model = BiLSTMCRF(char_vocab_size=p.char_vocab_size, word_vocab_size=p.word_vocab_size, num_labels=p.label_size, word_embedding_dim=1200, char_embedding_dim=50, word_lstm_size=300, char_lstm_size=300, fc_dim=50, dropout=0.5, embeddings=embeddings, use_char=True, use_crf=True) opt = Adam(lr=0.001) model, loss = model.build() model.compile(loss=loss, optimizer=opt, metrics=[crf_viterbi_accuracy]) es = EarlyStopping(monitor='val_crf_viterbi_accuracy', patience=3, verbose=1, mode='max', restore_best_weights=True) rlr = ReduceLROnPlateau(monitor='val_crf_viterbi_accuracy', factor=0.2, patience=2, verbose=1, mode='max') callbacks = [es, rlr] train_seq = NERSequence(x_train_spl, y_train_spl, config.batch_size, p.transform) if x_val and y_val: valid_seq = NERSequence(x_val, y_val, config.batch_size, p.transform) f1 = F1score(valid_seq, preprocessor=p, fold=n_fold) callbacks.append(f1) model.fit_generator(generator=train_seq, validation_data=valid_seq, epochs=config.nepochs, callbacks=callbacks, verbose=True, shuffle=True, use_multiprocessing=True, workers=12) p.save('../models/best_transform.it') model.load_weights('../models/best_model_' + str(n_fold) + '.h5') predict(model, p, x_test, n_fold)