def test_BayesianClassifier(): """ Trains Bayesian Classifier on test data and then tests it """ train_X, train_y = process_train_data("train.csv") classifier = BayesianClassifier() classifier.fit(train_X, train_y) test_data = pd \ .read_csv("test.csv", encoding="utf8") \ .drop(labels=["id", "Unnamed: 0"], axis=1) test_X = test_data.drop("label", axis=1) test_y = test_data.drop("tweet", axis=1) print("model score: ", classifier.score(test_X, test_y) * 100, "%")
for index, i in data[['text']].iterrows(): t = i['text'].lower().translate( str.maketrans('', '', string.punctuation)).strip().split() for j in t: if j not in stop_words: lst_for_rep.append(j) data.at[index, 'text'] = lst_for_rep lst_for_rep = [] return data def read_stop_words(): """ Reads file and returns list with words from file. """ lst = [] with open('authors/stop_words.txt', 'r') as file: reader = csv.reader(file) for row in reader: lst += row return lst if __name__ == "__main__": train = process_data("authors/train.csv") test = process_data("authors/test.csv") classifier = BayesianClassifier() classifier.fit(train) print(f"Model score: {classifier.score(test)}%")