class ValidatorClass: """Used to validate text and location attributes of a tweet """ def __init__(self, path_to_pickle_files): self.banned_word_list = ['rt ', 'https', 'jab'] self.text_classifier = TweetClassifier(path_to_pickle_files) def validate_location(self, location): """ Checks if location id None or "None" :param location: string :return: True/False: bool """ if location is None: return False if location == 'None': return False return True def validate_text_from_tweet(self, text_from_tweet): """ Checks if tweet is an empty string Checks if tweet contains banned words If above not true gets sentiment of text :param text_from_tweet: :return: True/False: bool """ if text_from_tweet == '': return False for banned_word in self.banned_word_list: if banned_word in text_from_tweet: return False validText = self.text_classifier.sentiment(text_from_tweet) return validText
print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set)) * 100) save_to_pickle_file(path_to_pickle_files, "LogisticRegression_classifier.pickle", LogisticRegression_classifier) SGDClassifier_classifier = SklearnClassifier(SGDClassifier()) SGDClassifier_classifier.train(training_set) print("SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set)) * 100) save_to_pickle_file(path_to_pickle_files, "SGDClassifier_classifier.pickle", SGDClassifier_classifier) LinearSVC_classifier = SklearnClassifier(LinearSVC()) LinearSVC_classifier.train(training_set) print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set)) * 100) save_to_pickle_file(path_to_pickle_files, "LinearSVC_classifier.pickle", LinearSVC_classifier) NuSVC_classifier = SklearnClassifier(NuSVC()) NuSVC_classifier.train(training_set) print("NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set)) * 100) save_to_pickle_file(path_to_pickle_files, "NuSVC_classifier.pickle", NuSVC_classifier) voted_classifier = TweetClassifier("pickle_files/") print("voted_classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, testing_set)) * 100)
def __init__(self, path_to_pickle_files): self.banned_word_list = ['rt ', 'https', 'jab'] self.text_classifier = TweetClassifier(path_to_pickle_files)