Пример #1
0
    def load(self, dirpath):
        """ Loads a trained model from local disk, given the dirpath

            Parameters
            ----------
            dirpath : str
                a directory where model artifacts are saved.

            Returns
            -------
            self
        """
        if not os.path.exists(dirpath):
            raise ValueError("Model directory not found: {:s}".format(dirpath))

        weights_file = os.path.join(dirpath, "weights.h5")
        params_file = os.path.join(dirpath, "params.json")
        preprocessor_file = os.path.join(dirpath, "preprocessor.pkl")

        if not (os.path.exists(weights_file) or 
                os.path.exists(params_file) or
                os.path.exists(preprocessor_file)):
            raise ValueError("Model files may be corrupted, exiting")
        
        self.model_ = load_model(weights_file, params_file)
        self.preprocessor_ = IndexTransformer.load(preprocessor_file)
        self.tagger_ = Tagger(self.model_, preprocessor=self.preprocessor_)

        return self
Пример #2
0
def predict_with_folds(swa):
    test = pd.read_csv(config.data_folder + "test.csv",
                       converters={"pos": literal_eval})
    x_test = [x.split() for x in test['sentence'].tolist()]

    p = IndexTransformer(use_char=True)
    p = p.load('../models/best_transform.it')
    lengths = map(len, x_test)
    x_test = p.transform(x_test)

    fold_result = []
    for n_fold in range(config.nfolds):

        path = '../models/best_model_' + str(n_fold)

        if swa:
            path += '_swa'

        model = load_model(path + '.h5',
                           custom_objects={
                               'CRF': CRF,
                               'RAdam': RAdam,
                               'crf_loss': crf_loss,
                               'crf_viterbi_accuracy': crf_viterbi_accuracy
                           })
        y_pred = model.predict(x_test, verbose=True)

        fold_result.append(y_pred)

    final_pred = np.mean(fold_result, axis=0)
    y_pred = p.inverse_transform(final_pred, lengths)
    build_submission(y_pred, 'fold')
Пример #3
0
def load_and_predict():
    test = pd.read_csv(config.data_folder + "test.csv",
                       converters={"pos": literal_eval})
    x_test = [x.split() for x in test['sentence'].tolist()]

    p = IndexTransformer(use_char=True)
    p = p.load('../models/best_transform.it')

    model = BiLSTMCRF(char_vocab_size=p.char_vocab_size,
                      word_vocab_size=p.word_vocab_size,
                      num_labels=p.label_size,
                      word_embedding_dim=300,
                      char_embedding_dim=100,
                      word_lstm_size=100,
                      char_lstm_size=50,
                      fc_dim=100,
                      dropout=0.5,
                      embeddings=None,
                      use_char=True,
                      use_crf=True)

    model, loss = model.build()
    model.load_weights('../models/' + 'best_model.h5')

    predict(model, p, x_test)
Пример #4
0
    def load(cls, weights_file, params_file, preprocessor_file):
        self = cls()
        self.p = IndexTransformer.load(preprocessor_file)
        self.model = load_model(weights_file, params_file)
        # Added by Sonvx on Jan 14, 2021: fix issue ("<tensor> is not an element of this graph." when loading model)
        self.model._make_predict_function()

        return self
Пример #5
0
 def __init__(self, process_proper_nouns=False):
     super().__init__(process_proper_nouns)
     model = load_model(os.path.join(ELMO_TAGGER_PATH, 'weights.h5'),
                        os.path.join(ELMO_TAGGER_PATH, 'params.json'))
     it = IndexTransformer.load(
         os.path.join(ELMO_TAGGER_PATH, 'preprocessor.pkl'))
     self.pos_tagger = Tagger(model,
                              preprocessor=it,
                              tokenizer=wordpunct_tokenize)
Пример #6
0
def main(args):
    print('Loading objects...')
    model = BiLSTMCRF.load(args.weights_file, args.params_file)
    it = IndexTransformer.load(args.preprocessor_file)
    tagger = Tagger(model, preprocessor=it)

    print('Tagging a sentence...')
    res = tagger.analyze(args.sent)
    pprint(res)
    def setUpClass(cls):
        weights_file = os.path.join(SAVE_ROOT, 'weights.h5')
        params_file = os.path.join(SAVE_ROOT, 'params.json')
        preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle')

        # Load preprocessor
        p = IndexTransformer.load(preprocessor_file)

        # Load the model.
        model = load_model(weights_file, params_file)

        # Build a tagger
        cls.tagger = anago.Tagger(model, preprocessor=p)

        cls.sent = 'President Obama is speaking at the White House.'
Пример #8
0
    def test_save_and_load(self):
        it = IndexTransformer(lower=False)
        x1, y1 = it.fit_transform(self.x, self.y)
        x1_word, x1_char, x1_length = x1

        self.assertFalse(os.path.exists(self.preprocessor_file))
        it.save(self.preprocessor_file)
        self.assertTrue(os.path.exists(self.preprocessor_file))

        it = IndexTransformer.load(self.preprocessor_file)
        x2, y2 = it.transform(self.x, self.y)
        x2_word, x2_char, x2_length = x2

        np.testing.assert_array_equal(x1_word, x2_word)
        np.testing.assert_array_equal(x1_char, x2_char)
        np.testing.assert_array_equal(y1, y2)
Пример #9
0
def evaluate(swa):
    train = pd.read_csv(config.data_folder + "train.csv", converters={"pos": literal_eval, "tag": literal_eval})
    x_train = [x.split() for x in train['sentence'].tolist()]
    y_train = train['tag'].tolist()

    p = IndexTransformer(use_char=True)
    p = p.load('../models/best_transform.it')

    oof_data = []
    oof_data_pred = []

    skf = KFold(n_splits=config.nfolds, random_state=config.seed, shuffle=True)

    for n_fold, (train_indices, val_indices) in enumerate(skf.split(x_train)):

        x_val = list(np.array(x_train)[val_indices])
        y_val = list(np.array(y_train)[val_indices])
        print(y_val[:5])
        oof_data.extend([x for line in y_val for x in line])
        print(oof_data[:5])
        lengths = map(len, x_val)
        x_val = p.transform(x_val)

        path = '../models/best_model_' + str(n_fold)

        if swa:
            path += '_swa'

        model = load_model(path + '.h5',
                           custom_objects={'CRF': CRF,
                                           'RAdam': RAdam,
                                           'crf_loss' : crf_loss,
                                           'crf_viterbi_accuracy': crf_viterbi_accuracy})

        # model.load_weights('../models/best_model_' + str(n_fold) + '.h5')

        y_pred = model.predict(x_val,
                               verbose=True)
        print(y_pred[:5])
        y_pred = p.inverse_transform(y_pred, lengths)
        print(y_pred[:5])
        oof_data_pred.extend([pred for line in y_pred for pred in line])
        print(oof_data_pred[:5])

    bacc = balanced_accuracy_score(oof_data,oof_data_pred)
    print("Final CV: ", bacc*100)
Пример #10
0
    def load(cls, weights_file, params_file, preprocessor_file):
        self = cls()
        self.p = IndexTransformer.load(preprocessor_file)
        self.model = load_model(weights_file, params_file)

        return self