def run_multiple_voting(): """Run tests on multiple weighting systems given stored predictions of the classifiers in main() To be used in conjunction with alternative_main to determine which weighting method performs better """ # Load predictions from all classifiers and actual labels for test_set_1 preds1, actual1 = load_preds(1) # Load predictions from all classifiers and actual labels for test_set_2 preds2, actual2 = load_preds(2) # Create confusion matrices for each classifier b_cm = ConfusionMatrix(actual1, preds1[0], "Bayes") p_cm = ConfusionMatrix(actual1, preds1[1], "Proximity") v_cm = ConfusionMatrix(actual1, preds1[2], "Voting") # r_cm = ConfusionMatrix(actual1, preds1[2], "LSTM") confusionMatrices = [b_cm, p_cm, v_cm] # confusionMatices = [p_cm, v_cm, b_cm, r_cm] # Save individual confusion matrices to files for cm in confusionMatrices: cm.store_cm() print("Individual confusion matrices created and stored!") # Weight second set of results, using confusion matrices from first set weightingInput = [ [confusionMatrices[0], preds2[0]], [confusionMatrices[1], preds2[1]] # [confusionMatrices[2] ,b.batchTest(test_set2)], # [confusionMatrices[3], r.batchTest(test_set2)], ] # Get the weighted voting results votes_p = voting(weightingInput, "Precision") votes_CEN_p = voting(weightingInput, "CEN_Precision") votes_CEN = voting(weightingInput, "CEN") votes_eq = voting(weightingInput, "Equal_Vote") # Check metrics print(classification_report(actual2, votes_p)) print(classification_report(actual2, votes_CEN_p)) print(classification_report(actual2, votes_CEN)) print(classification_report(actual2, votes_eq)) # Create final confusion matrices depending on votes p_cm = ConfusionMatrix(actual2, votes_p, "Precision") p_CEN_cm = ConfusionMatrix(actual2, votes_CEN_p, "CEN_Precision") CEN_cm = ConfusionMatrix(actual2, votes_CEN, "CEN") eq_cm = ConfusionMatrix(actual2, votes_eq, "Equal") # Store confusion matrices p_cm.store_cm() p_CEN_cm.store_cm() CEN_cm.store_cm() eq_cm.store_cm() return votes_p, votes_CEN_p, votes_CEN, votes_eq
def test_weighting(): """Test weighting.py""" # Create confusion matrices for random classifiers yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred1 = np.random.randint(3, size=12) cm1 = ConfusionMatrix(yactual, ypred1, "cls_1") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred2 = np.random.randint(3, size=12) cm2 = ConfusionMatrix(yactual, ypred2, "cls_2") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred3 = np.random.randint(3, size=12) cm3 = ConfusionMatrix(yactual, ypred3, "cls_3") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred4 = np.random.randint(3, size=12) cm4 = ConfusionMatrix(yactual, ypred4, "cls_4") weight_pairs = [[cm1, ypred1], [cm2, ypred2], [cm3, ypred3], [cm4, ypred4]] # Check that CEN score is being calculated #print(cm1.get_CEN_score(), cm2.get_CEN_score(), cm3.get_CEN_score(), cm4.get_CEN_score()) # Get final votes based on pairs votes_p = voting(weight_pairs, "Precision") votes_CEN_p = voting(weight_pairs, "CEN_Precision") votes_CEN = voting(weight_pairs, "CEN") votes_eq = voting(weight_pairs, "Equal_Vote") # Check metrics print(classification_report(yactual, votes_p)) print(classification_report(yactual, votes_CEN_p)) print(classification_report(yactual, votes_CEN)) print(classification_report(yactual, votes_eq)) # Create final confusion matrices depending on votes p_cm = ConfusionMatrix(yactual, votes_p, "Precision_Voting") p_CEN_cm = ConfusionMatrix(yactual, votes_CEN_p, "CEN_Precision_Voting") CEN_cm = ConfusionMatrix(yactual, votes_CEN, "CEN_Voting") eq_cm = ConfusionMatrix(yactual, votes_eq, "Equal_Voting") # Store confusion matrices p_cm.store_cm() p_CEN_cm.store_cm() CEN_cm.store_cm() eq_cm.store_cm()
def main(tperc, seed, fpaths, weighting_type): """Parses files, trains the models, tests the models, creates the weights, makes predictions and evaluates results""" files = openFiles(fpaths) instances = parseFiles(files) train_set, test_set1, test_set2 = splitSets(tperc, seed, instances) # Initialize all models #b = BayesEliminationModel() #v = VotingModel() # print("Initialized all models!") # # # Train all models # # print("Training Bayes...") # #b.train(train_set) # print("Training Voting...") # #v.train(train_set) # # print("Trained all models!") # Run models and store first set of results print("We have a total of", len(test_set1), len(test_set2), "tweets") #b_pred = b.batchTest(test_set1) r_pred = runLSTM(test_set1) #v_pred = v.batchTest(test_set1) print(r_pred) print("Predictions made for first test set!") # Store first set of predictions #preds1 = [b_pred, r_pred, v_pred] test_set1_labels = [i.getLabel() for i in test_set1] #store_preds(preds1, test_set1_labels, 1) print("Stored predictions for first test set!") # Run models and store second set of results #b_pred2 = b.batchTest(test_set2) r_pred2 = runLSTM(test_set2) #v_pred2 = v.batchTest(test_set2) print("Predictions made for second test set!") # Store second set of predictions #preds2 = [b_pred2, r_pred2, v_pred2] test_set2_labels = [i.getLabel() for i in test_set2] #store_preds(preds2, test_set2_labels, 2) print("Stored predictions for second test set!") # Get weighted votes votes_p, votes_CEN_p, votes_CEN, votes_eq = run_multiple_voting() if weighting_type == "Precision": guesses = votes_p elif weighting_type == "CEN": guesses = votes_CEN elif weighting_type == "CEN_Precision": guesses = votes_CEN_p else: guesses = votes_eq # Create confusion matrix for final model and store it in a file final_cm = ConfusionMatrix(test_set2_labels, guesses, "Final_Model_" + weighting_type) final_cm.store_cm() print("Stored confusion matrix!") # Store second set of tweets and guesses test_set2_tweets = [t.getFullTweet() for t in test_set2] store_new_labels(test_set2_tweets, guesses, test_set2_labels) print("Stored new predictions!")
def main(tperc, seed, fpaths): """Parses files, trains the models, tests the models, creates the weights, makes predictions and evaluates results""" files = openFiles(fpaths) instances = parseFiles(files) train_set, test_set1, test_set2 = splitSets(tperc, seed, instances) # Initialize all models b = BayesEliminationModel() p = ProximityModel() print("Initialized all models!") # Train all models p.train(train_set) b.train(train_set) print("Trained all models!") # Run models and store first set of results p_pred = p.batchTest(test_set1) b_pred = b.batchTest(test_set1) print("Predictions made for first test set!") # Store first set of predictions preds1 = [p_pred, b_pred] test_set1_labels = [i.getLabel() for i in test_set1] store_preds(preds1, test_set1_labels, 1) print("Stored predictions for first test set!") # Get confusion matrices for first set of results p_cm = ConfusionMatrix(test_set1_labels, p_pred, "Proximity") b_cm = ConfusionMatrix(test_set1_labels, b_pred, "Bayes") confusionMatrices = [p_cm, b_cm] # Save individual confusion matrices to files for cm in confusionMatrices: cm.store_cm() print("Individual confusion matrices created and stored!") # Second set of predictions p_pred2 = p.batchTest(test_set2) b_pred2 = b.batchTest(test_set2) print("Predictions made for second test set!") # Store second set of predictions preds2 = [p_pred2, b_pred2] test_set2_labels = [i.getLabel() for i in test_set2] store_preds(preds2, test_set2_labels, 2) print("Stored predictions for second test set!") # Weight second set of results, using confusion matrices from first set weightingInput = [[confusionMatrices[0], p_pred2], [confusionMatrices[1], b_pred2]] # Get the weighting results guesses = voting(weightingInput, weighting_type) print("Voting done!") # print(guesses) # Create confusion matrix for final model and store it in a file final_cm = ConfusionMatrix(test_set2_labels, guesses, "Final_Model_" + weighting_type) final_cm.store_cm() print("Stored confusion matrix!") # Store second set of tweets and guesses test_set2_tweets = [t.getFullTweet() for t in test_set2] store_new_labels(test_set2_tweets, guesses, test_set2_labels) print("Stored new predictions!")