def __init__(self, tweets=[]): # initialize internal variables self.rules_classifier = RulesClassifier() self.lexicon_classifier = LexiconClassifier() self.ml_classifier = None # if the ML model has been generated, load the model from model.pkl if sys.version_info >= (3, 0): if os.path.exists( str(var.model_classifier) + '-model_python3.pkl'): print('Reading the ' + str(var.model_classifier) + ' model from model_python3.pkl') self.ml_classifier = pickle.load( open( str(var.model_classifier) + '-model_python3.pkl', 'rb')) else: if os.path.exists( str(var.model_classifier) + '-model_python2.pkl'): print('Reading the ' + str(var.model_classifier) + ' model from model_python2.pkl') self.ml_classifier = pickle.load( open( str(var.model_classifier) + '-model_python2.pkl', 'rb')) if self.ml_classifier == None: # Preprocess the data and train a new model print('Preprocessing the training data') tweet_messages = [tweet_message for tweet_message, label in tweets] tweet_labels = [label for tweet_message, label in tweets] # preproces all the tweet_messages (Tokenization, POS and normalization) tweet_tokens = pre_process(tweet_messages) # compile a trainset with tweek_tokens and labels (positive, # negative or neutral) trainset = [(tweet_tokens[i], tweet_labels[i]) for i in range(len(tweets))] # initialize the classifier and train it classifier = MachineLearningClassifier(trainset) # dump the model into de pickle python_version = sys.version_info[0] model_name = str(var.model_classifier) + '-model_python' + str( python_version) + '.pkl' print('Saving the trained model at ' + model_name) pickle.dump(classifier, open(model_name, 'wb')) self.ml_classifier = classifier
def __init__(self, trainset=[]): self.rules_classifier = RulesClassifier() self.lexicon_classifier = LexiconClassifier() self.ml_classifier = MachineLearningClassifier(trainset)
class TwitterHybridClassifier(object): def __init__(self, trainset=[]): self.rules_classifier = RulesClassifier() self.lexicon_classifier = LexiconClassifier() self.ml_classifier = MachineLearningClassifier(trainset) # Apply the classifier over a tweet message in String format def classify(self,tweet_text): # 0. Pre-process the text (emoticons, misspellings, tagger) tweet_text = pre_process(tweet_text) # 1. Rule-based classifier. Look for emoticons basically positive_score,negative_score = self.rules_classifier.classify(tweet_text) rules_score = positive_score + negative_score # 1. Apply the rules, If any found, classify the tweet here. If none found, continue for the lexicon classifier. if rules_score != 0: if rules_score > 0: sentiment = 'positive' else: sentiment = 'negative' return sentiment # 2. Lexicon-based classifier positive_score, negative_score = self.lexicon_classifier.classify(tweet_text) lexicon_score = positive_score + negative_score # 2. Apply lexicon classifier, If the lexicon score is # 0 (strictly neutral), >3 (positive with confidence) or # <3 (negative with confidence), classify the tweet here. If not, # continue for the SVM classifier if lexicon_score == 0: sentiment = 'neutral' return sentiment if lexicon_score >= 3: sentiment = 'positive' return sentiment if lexicon_score <= -3: sentiment = 'negative' return sentiment # 3. Machine learning based classifier - used the training set to define the best features to classify new instances scores = self.ml_classifier.classify(tweet_text) positive_conf = scores[0][1] negative_conf = scores[1][1] neutral_conf = scores[2][1] # 3. Apply machine learning classifier, If positive or negative # confidence (probability) is >=0.3, classify with the sentiment. # Otherwise, classify as neutral if positive_conf >= 0.3 and negative_conf < positive_conf: sentiment = 'positive' elif negative_conf >= 0.3: sentiment = 'negative' else: sentiment = 'neutral' return sentiment
class TwitterHybridClassifier(object): def __init__(self, trainset=[]): self.rules_classifier = RulesClassifier() self.lexicon_classifier = LexiconClassifier() self.ml_classifier = MachineLearningClassifier(trainset) # Apply the classifier over a tweet message in String format def classify(self, tweet_text): # 0. Pre-process the text (emoticons, misspellings, tagger) tweet_text = pre_process(tweet_text) # 1. Rule-based classifier. Look for emoticons basically positive_score, negative_score = self.rules_classifier.classify( tweet_text) rules_score = positive_score + negative_score # 1. Apply the rules, If any found, classify the tweet here. If none found, continue for the lexicon classifier. if rules_score != 0: if rules_score > 0: sentiment = 'positive' else: sentiment = 'negative' return sentiment # 2. Lexicon-based classifier positive_score, negative_score = self.lexicon_classifier.classify( tweet_text) lexicon_score = positive_score + negative_score # 2. Apply lexicon classifier, If the lexicon score is # 0 (strictly neutral), >3 (positive with confidence) or # <3 (negative with confidence), classify the tweet here. If not, # continue for the SVM classifier if lexicon_score == 0: sentiment = 'neutral' return sentiment if lexicon_score >= 3: sentiment = 'positive' return sentiment if lexicon_score <= -3: sentiment = 'negative' return sentiment # 3. Machine learning based classifier - used the training set to define the best features to classify new instances scores = self.ml_classifier.classify(tweet_text) positive_conf = scores[0][1] negative_conf = scores[1][1] neutral_conf = scores[2][1] # 3. Apply machine learning classifier, If positive or negative # confidence (probability) is >=0.3, classify with the sentiment. # Otherwise, classify as neutral if positive_conf >= 0.3 and negative_conf < positive_conf: sentiment = 'positive' elif negative_conf >= 0.3: sentiment = 'negative' else: sentiment = 'neutral' return sentiment