def execute_demo(language): data = Dataset(language) print("{}: {} training - {} Test\n".format(language.upper(), len(data.trainset), len(data.devset))) #for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language, type='classify') baseline.train(data.trainset) predictions = baseline.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions, detailed=True) ########################### Regression ###################33 baseline2 = Baseline(language, type='regression') baseline2.train(data.trainset) predictions = baseline2.test(data.devset) gold_labels2 = [float(sent['gold_prob']) for sent in data.devset] print("Probabilistic classification task:\nMSE:", mean_squared_error(gold_labels2, predictions), "\n\n")
def __init__(self, df, scoring: str = "accuracy"): Baseline.__init__(self, df) self.scoring = scoring self.df = df self.params = { "n_estimators": [10, 20, 30, 40, 50, 100], "criterion": ["gini", "entropy"], "max_depth": [2, 5, 10], "min_samples_leaf": [1, 10] #'max_features' : np.arange(0.1,1,0.1).tolist() } self.estimator = RandomForestClassifier(random_state=42)
def execute_demo(language): data = Dataset(language) baseline = Baseline(language) baseline.train(data.trainset, data.unigram, data.suffix, data.char_trigram, data.pos, data.dep, data.shape, data.frequency) predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels,predictions)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) baseline = Baseline(language) baseline.train(data.trainset) predictions = baseline.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions)
def execute_demo(language): if language == 'english': word_emb = load_word_embeddings('english') elif language == 'spanish': word_emb = load_word_embeddings('spanish') data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) #for sent in data.trainset: # Gold label -> 0 if the word is not complex, 1 if the word is complex. #print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language) model = Model(language) model.train(data.trainset, word_emb) predictions = model.test(data.devset, word_emb) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) # for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language) baseline.train(data.trainset) predictions = baseline.test(data.devset) print(predictions[:50]) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions)
def execute_demo(language, flag): data = Dataset(language) if flag == 0: print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset)) ) #data.trainset 是dataset函数内返回的dataset的形式 data.devset用来测试用的 if flag == 1: print("{}: {} training - {} test".format(language, len(data.trainset), len(data.testset))) # for sent in data.trainset: # # print(sent['sentence'], sent['target_word'], sent['gold_label']) # print(sent) baseline = Baseline(language) baseline.train(data.trainset) predictions_devset = baseline.test(data.devset) predictions_testset = baseline.test(data.testset) gold_labels_devset = [sent['gold_label'] for sent in data.devset] ##输出的是二元值 0 1 0 1形式的 gold_labels_testset = [sent['gold_label'] for sent in data.testset] if flag == 0: print("Test by using dev set:") report_score(gold_labels_devset, predictions_devset) if flag == 1: print("Test by using test set:") report_score(gold_labels_testset, predictions_testset)
def execute_demo(language, algor): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.testset))) baseline = Baseline(language, algor) freqdict1 = baseline.freqdict(data.trainset + data.testset) posindex1 = baseline.posdict(data.trainset + data.testset) baseline.train(data.trainset, freqdict1, posindex1) predictions = baseline.test(data.testset, freqdict1, posindex1) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels, predictions)
def execute_demo(language, amountdata=100): data = Dataset(language, amountdata) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) print('\nInitialising') baseline = Baseline(language) improved = Improved(language) print('Training') baseline.train(data.trainset) improved.train(data.trainset) print('Predicting') predictions = baseline.test(data.devset) predictionImp = improved.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] target = [sent['target_word'] for sent in data.devset] print("\nScore for baseline:") report_score(gold_labels, predictions) print("Score for improved model:") report_score(gold_labels, predictionImp) print('Predicting on testset') predictions2 = baseline.test(data.testset) predictionImp2 = improved.test(data.testset) gold_labels2 = [sent['gold_label'] for sent in data.testset] target2 = [sent['target_word'] for sent in data.testset] print("\nScore for baseline:") report_score(gold_labels2, predictions2) print("Score for improved model:") report_score(gold_labels2, predictionImp2) results = [(predictions[i], predictionImp[i], gold_labels[i], target[i]) for i in range(len(target))] ####to show wrong predictions results = [tup for tup in results if tup[0] != tup[2] and tup[1] != tup[2]] results2 = [(predictions2[i], predictionImp2[i], gold_labels2[i], target2[i]) for i in range(len(target2))] return results, results2
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev - {} test".format(language, len(data.trainset), len(data.devset), len(data.testset))) baseline = Baseline(language) baseline.train(data.trainset) dev = baseline.test(data.devset) devLabels = [sent['gold_label'] for sent in data.devset] print("Fine-tuned Score - Dev Set") report_score(devLabels, dev, detailed=True) predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] print("Final Score - Test Set") report_score(gold_labels, predictions, detailed=True)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) # trainset = data.trainset[:int(len(data.trainset)*1/100)] print('Feature based models') baseline = Baseline(language) print('Training models') baseline.train(data.trainset) # baseline.train(trainset) print('Predicting labels') predictions = baseline.test(data.devset) predictions_int =[] for pred in predictions: pred_int = [] for val in pred[1]: pred_int.append(int(val)) predictions_int.append(pred_int) gold_labels = [sent['gold_label'] for sent in data.devset] # target_words = [sent['target_word'] for sent in data.devset] print('Calculating scores') for pred in predictions: print('Scores for' ,pred[0]) report_score(gold_labels, pred[1]) print('Scores for hard voting with all models') avg_pred_int = np.mean(np.array(predictions_int), axis = 0).tolist() avg_pred = [str(round(val)) for val in avg_pred_int] report_score(gold_labels, avg_pred) # Woed2vec based models print('Word2vec based models') print('Loading w2v') w2v = Word2vec(language) print('Training models') w2v.train(data.trainset) # w2v.train(trainset) print('Predicting labels') predictions_w2v = w2v.test(data.devset) predictions_w2v_int =[] for pred in predictions_w2v: pred_int = [] for val in pred[1]: pred_int.append(int(val)) predictions_w2v_int.append(pred_int) print('Calculating scores') for pred in predictions_w2v: print('Scores for' ,pred[0]) report_score(gold_labels, pred[1]) print('Scores for hard voting with all models') avg_pred_w2v_int = np.mean(np.array(predictions_w2v_int), axis = 0).tolist() avg_pred_w2v = [str(round(val)) for val in avg_pred_w2v_int] report_score(gold_labels, avg_pred_w2v) for pred in predictions: pred_int = [] for val in pred[1]: pred_int.append(int(val)) predictions_w2v_int.append(pred_int) print('Scores for hard voting with both types of models') avg_pred_all_int = np.mean(np.array(predictions_w2v_int), axis = 0).tolist() avg_pred_all = [str(round(val)) for val in avg_pred_all_int] report_score(gold_labels, avg_pred_all)
from utils.config import df from utils.baseline import Baseline from utils.scoring import Scoring from utils.validation import ( crosstabing, cls_report, roc_curve_test, mat_cor, conf_matrix, heatmap_conf, ) # baseline baseline = Baseline(df) baseline_output = baseline.run() # Accuracy acc_scoring = Scoring(df, scoring="accuracy") ( acc_scoring_output, prediction_accuracy, X_test_acc, y_true_acc, best_estimator_acc, ) = acc_scoring.run_own() # Roc roc_scoring = Scoring(df, scoring="roc") ( roc_scoring_output, prediction_roc, X_test_roc, y_true_roc,
def execute_demo(language): data = Dataset(language) # test_data = data.testset test_data = data.devset print("{}: {} training - {} test".format(language, len(data.trainset), len(test_data))) baseline = Baseline(language) advanced = Advanced(language) models_to_run = [baseline, advanced] model_mistakes = {} gold_labels = [sent['gold_label'] for sent in test_data] # Error analysis: sentences = [sent['sentence'] for sent in test_data] targets = [sent['target_word'] for sent in test_data] model_predictions = {} debug = False for model in models_to_run: model.train(data.trainset) trained = model.train(data.trainset) # Since only English uses RFC importances = False if importances == True: if language == 'english' and model == advanced: importances = trained.feature_importances_ ordered_feature_list = model.ordered_feature_list indices = np.argsort(importances)[::-1] for f in range(20): print("{}. & {} & ({:0.3}) \\\\ \hline".format(f+1, ordered_feature_list[indices[f]], importances[indices[f]])) predictions = model.test(test_data) model_predictions[model.name] = predictions print(model.name) report_score(gold_labels, predictions) if debug == True: look_at = 500 for sent_i in range(look_at): if predictions[sent_i] != gold_labels[sent_i]: if sent_i in model_mistakes: model_mistakes[sent_i].append(model.name) else: model_mistakes[sent_i] = [model.name] else: if sent_i not in model_mistakes: model_mistakes[sent_i] = [] if debug == True: both_right = [] advanced_right = [] baseline_right = [] both_wrong = [] for key, value in model_mistakes.items(): if len(value) == 2: both_wrong.append(key) elif len(value) == 0: both_right.append(key) elif value[0] == 'Baseline': advanced_right.append(key) else: baseline_right.append(key) # Finds an example of an incorrect word. max_wrong = 10 for perm in [both_right, both_wrong, advanced_right, baseline_right]: curr_wrong = 0 for item in perm: if curr_wrong == max_wrong: break curr_wrong += 1 sent = sentences[item] target = targets[item] gold = gold_labels[item] if perm == advanced_right: predict = model_predictions['Advanced'][item] else: predict = model_predictions['Baseline'][item] if perm == advanced_right: perm_name = 'Advanced Correct, Baseline Incorrect' elif perm == baseline_right: perm_name = 'Advanced Incorrect, Baseline Correct' elif perm == both_right: perm_name = 'Both Correct' else: perm_name = 'Both Incorrect' print("{}:\n Sent: {}\n Target: {}\n Predicted: {}\n Gold: {}\n".format(perm_name, sent, target, predict, gold))