示例#1
0
    def save(self, dirpath):
        """ Saves model to local disk, given a dirpath 
        
            Parameters
            ----------
            dirpath : str
                a directory where model artifacts will be saved.
                Model saves a weights.h5 weights file, a params.json parameter
                file, and a preprocessor.pkl preprocessor file.

            Returns
            -------
            None
        """
        if self.model_ is None or self.preprocessor_ is None:
            raise ValueError("No model artifacts to save, either run fit() to train or load() a trained model")

        if not os.path.exists(dirpath):
            os.makedirs(dirpath)

        weights_file = os.path.join(dirpath, "weights.h5")
        params_file = os.path.join(dirpath, "params.json")
        preprocessor_file = os.path.join(dirpath, "preprocessor.pkl")

        save_model(self.model_, weights_file, params_file)
        self.preprocessor_.save(preprocessor_file)

        write_param_file(self.get_params(), os.path.join(dirpath, "params.yaml"))
示例#2
0
def main(args):
    print('Loading dataset...')
    x_train, y_train = load_data_and_labels(args.train_data)
    x_valid, y_valid = load_data_and_labels(args.valid_data)
    x_test, y_test = load_data_and_labels(args.test_data)
    x_train = np.r_[x_train, x_valid, x_test]
    y_train = np.r_[y_train, y_valid, y_test]

    print('Transforming datasets...')
    p = IndexTransformer(use_char=args.no_char_feature)
    p.fit(x_train, y_train)

    print('Building a model.')
    model = BiLSTMCRF(char_embedding_dim=args.char_emb_size,
                      word_embedding_dim=args.word_emb_size,
                      char_lstm_size=args.char_lstm_units,
                      word_lstm_size=args.word_lstm_units,
                      char_vocab_size=p.char_vocab_size,
                      word_vocab_size=p.word_vocab_size,
                      num_labels=p.label_size,
                      dropout=args.dropout,
                      use_char=args.no_char_feature,
                      use_crf=args.no_use_crf)
    model, loss = model.build()
    model.compile(loss=loss, optimizer='adam')

    print('Training the model...')
    trainer = Trainer(model, preprocessor=p)
    trainer.train(x_train, y_train, x_valid, y_valid, epochs=args.max_epoch)

    print('Saving the model...')
    save_model(model, args.weights_file, args.params_file)
    p.save(args.preprocessor_file)
示例#3
0
    def test_save(self):
        # Train the model.
        trainer = Trainer(self.model, preprocessor=self.p)
        trainer.train(self.x_train, self.y_train)

        # Save the model.
        save_model(self.model, self.weights_file, self.params_file)
        self.p.save(self.preprocessor_file)
示例#4
0
    def test_save_and_load(self):
        char_vocab_size = 100
        word_vocab_size = 10000
        num_labels = 10

        model = BiLSTMCRF(char_vocab_size=char_vocab_size,
                          word_vocab_size=word_vocab_size,
                          num_labels=num_labels)
        model, loss = model.build()

        self.assertFalse(os.path.exists(self.weights_file))
        self.assertFalse(os.path.exists(self.params_file))

        save_model(model, self.weights_file, self.params_file)

        self.assertTrue(os.path.exists(self.weights_file))
        self.assertTrue(os.path.exists(self.params_file))

        model = load_model(self.weights_file, self.params_file)
示例#5
0
 def save(self, weights_file, params_file, preprocessor_file):
     self.p.save(preprocessor_file)
     save_model(self.model, weights_file, params_file)
示例#6
0
p.fit(x_train, y_train)

print('Loading word embeddings...')
embeddings = load_glove(EMBEDDING_PATH)
embeddings = filter_embeddings(embeddings, p._word_vocab.vocab, EMBEDDING_DIM)

print('Building a model.')
model = ELModel(char_embedding_dim=32,
                word_embedding_dim=EMBEDDING_DIM,
                char_lstm_size=32,
                word_lstm_size=EMBEDDING_DIM,
                char_vocab_size=p.char_vocab_size,
                word_vocab_size=p.word_vocab_size,
                num_labels=p.label_size,
                embeddings=embeddings)
model, loss = model.build()
model.compile(loss=loss, optimizer='adam')

print('Training the model...')
trainer = Trainer(model, preprocessor=p)
trainer.train(x_train, y_train, x_test, y_test,
              callbacks=[
                  TensorBoard(log_dir=log_dir, write_graph=False),
                  ModelCheckpoint(weights_path, save_weights_only=True),
                  ReduceLROnPlateau(),
                  EarlyStopping(patience=EARLY_STOP)])

print('Saving the model...')
save_model(model, os.path.join(log_dir, 'weights.h5'), os.path.join(log_dir, 'params.json'))
p.save(os.path.join(log_dir, 'preprocessor.pkl'))
# model.save('weights.h5', 'params.json')