def test_inverse_transform_one_cat(self): x_train, y_train = [['a']], [['O']] it = IndexTransformer() it.fit(self.x, self.y) _, y = it.transform(x_train, y_train) inv_y = it.inverse_transform(y) self.assertNotEqual(inv_y, self.y)
def test_inverse_transform_unknown_token(self): x_train, y_train = [['a', 'b']], [['X', 'O']] it = IndexTransformer() it.fit(self.x, self.y) _, y = it.transform(x_train, y_train) inv_y = it.inverse_transform(y) self.assertNotEqual(inv_y, self.y)
def predict_with_folds(swa): test = pd.read_csv(config.data_folder + "test.csv", converters={"pos": literal_eval}) x_test = [x.split() for x in test['sentence'].tolist()] p = IndexTransformer(use_char=True) p = p.load('../models/best_transform.it') lengths = map(len, x_test) x_test = p.transform(x_test) fold_result = [] for n_fold in range(config.nfolds): path = '../models/best_model_' + str(n_fold) if swa: path += '_swa' model = load_model(path + '.h5', custom_objects={ 'CRF': CRF, 'RAdam': RAdam, 'crf_loss': crf_loss, 'crf_viterbi_accuracy': crf_viterbi_accuracy }) y_pred = model.predict(x_test, verbose=True) fold_result.append(y_pred) final_pred = np.mean(fold_result, axis=0) y_pred = p.inverse_transform(final_pred, lengths) build_submission(y_pred, 'fold')
def evaluate(swa): train = pd.read_csv(config.data_folder + "train.csv", converters={"pos": literal_eval, "tag": literal_eval}) x_train = [x.split() for x in train['sentence'].tolist()] y_train = train['tag'].tolist() p = IndexTransformer(use_char=True) p = p.load('../models/best_transform.it') oof_data = [] oof_data_pred = [] skf = KFold(n_splits=config.nfolds, random_state=config.seed, shuffle=True) for n_fold, (train_indices, val_indices) in enumerate(skf.split(x_train)): x_val = list(np.array(x_train)[val_indices]) y_val = list(np.array(y_train)[val_indices]) print(y_val[:5]) oof_data.extend([x for line in y_val for x in line]) print(oof_data[:5]) lengths = map(len, x_val) x_val = p.transform(x_val) path = '../models/best_model_' + str(n_fold) if swa: path += '_swa' model = load_model(path + '.h5', custom_objects={'CRF': CRF, 'RAdam': RAdam, 'crf_loss' : crf_loss, 'crf_viterbi_accuracy': crf_viterbi_accuracy}) # model.load_weights('../models/best_model_' + str(n_fold) + '.h5') y_pred = model.predict(x_val, verbose=True) print(y_pred[:5]) y_pred = p.inverse_transform(y_pred, lengths) print(y_pred[:5]) oof_data_pred.extend([pred for line in y_pred for pred in line]) print(oof_data_pred[:5]) bacc = balanced_accuracy_score(oof_data,oof_data_pred) print("Final CV: ", bacc*100)
def test_inverse_transform(self): it = IndexTransformer() x, y = it.fit_transform(self.x, self.y) _, _, length = x inv_y = it.inverse_transform(y, length) self.assertEqual(inv_y, self.y)
def test_inverse_transform(self): it = IndexTransformer() x, y = it.fit_transform(self.x, self.y) lengths = map(len, self.y) inv_y = it.inverse_transform(y, lengths) self.assertEqual(inv_y, self.y)