示例#1
0
    def __init__(self, tweets=[]):
        # initialize internal variables
        self.rules_classifier = RulesClassifier()
        self.lexicon_classifier = LexiconClassifier()
        self.ml_classifier = None

        # if the ML model has been generated, load the model from model.pkl
        if sys.version_info >= (3, 0):
            if os.path.exists(
                    str(var.model_classifier) + '-model_python3.pkl'):
                print('Reading the ' + str(var.model_classifier) +
                      ' model from model_python3.pkl')
                self.ml_classifier = pickle.load(
                    open(
                        str(var.model_classifier) + '-model_python3.pkl',
                        'rb'))
        else:
            if os.path.exists(
                    str(var.model_classifier) + '-model_python2.pkl'):
                print('Reading the ' + str(var.model_classifier) +
                      ' model from model_python2.pkl')
                self.ml_classifier = pickle.load(
                    open(
                        str(var.model_classifier) + '-model_python2.pkl',
                        'rb'))

        if self.ml_classifier == None:
            # Preprocess the data and train a new model
            print('Preprocessing the training data')
            tweet_messages = [tweet_message for tweet_message, label in tweets]
            tweet_labels = [label for tweet_message, label in tweets]

            # preproces all the tweet_messages (Tokenization, POS and normalization)
            tweet_tokens = pre_process(tweet_messages)

            # compile a trainset with tweek_tokens and labels (positive,
            # negative or neutral)

            trainset = [(tweet_tokens[i], tweet_labels[i])
                        for i in range(len(tweets))]

            # initialize the classifier and train it
            classifier = MachineLearningClassifier(trainset)

            # dump the model into de pickle
            python_version = sys.version_info[0]
            model_name = str(var.model_classifier) + '-model_python' + str(
                python_version) + '.pkl'
            print('Saving the trained model at ' + model_name)
            pickle.dump(classifier, open(model_name, 'wb'))
            self.ml_classifier = classifier
 def __init__(self, trainset=[]):
     self.rules_classifier = RulesClassifier()
     self.lexicon_classifier = LexiconClassifier()
     self.ml_classifier = MachineLearningClassifier(trainset)
 def __init__(self, trainset=[]):
     self.rules_classifier = RulesClassifier()
     self.lexicon_classifier = LexiconClassifier()
     self.ml_classifier = MachineLearningClassifier(trainset)
class TwitterHybridClassifier(object):

    def __init__(self, trainset=[]):
        self.rules_classifier = RulesClassifier()
        self.lexicon_classifier = LexiconClassifier()
        self.ml_classifier = MachineLearningClassifier(trainset)

    # Apply the classifier over a tweet message in String format
    def classify(self,tweet_text):

        # 0. Pre-process the text (emoticons, misspellings, tagger)
        tweet_text = pre_process(tweet_text)

        # 1. Rule-based classifier. Look for emoticons basically
        positive_score,negative_score = self.rules_classifier.classify(tweet_text)
        rules_score = positive_score + negative_score

        # 1. Apply the rules, If any found, classify the tweet here. If none found, continue for the lexicon classifier.
        if rules_score != 0:
            if rules_score > 0:
                sentiment = 'positive'
            else:
                sentiment = 'negative'
            return sentiment

        # 2. Lexicon-based classifier
        positive_score, negative_score = self.lexicon_classifier.classify(tweet_text)
        lexicon_score = positive_score + negative_score

        # 2. Apply lexicon classifier, If the lexicon score is
        # 0 (strictly neutral), >3 (positive with confidence) or
        # <3 (negative with confidence), classify the tweet here. If not,
        # continue for the SVM classifier
        if lexicon_score == 0:
            sentiment = 'neutral'
            return sentiment

        if lexicon_score >= 3:
            sentiment = 'positive'
            return sentiment

        if lexicon_score <= -3:
            sentiment = 'negative'
            return sentiment

        # 3. Machine learning based classifier - used the training set to define the best features to classify new instances
        scores = self.ml_classifier.classify(tweet_text)
        positive_conf = scores[0][1]
        negative_conf = scores[1][1]
        neutral_conf = scores[2][1]

        # 3. Apply machine learning classifier, If positive or negative
        # confidence (probability) is >=0.3, classify with the sentiment.
        # Otherwise, classify as neutral
        if positive_conf >= 0.3 and negative_conf < positive_conf:
            sentiment = 'positive'
        elif negative_conf >= 0.3:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'

        return sentiment
class TwitterHybridClassifier(object):
    def __init__(self, trainset=[]):
        self.rules_classifier = RulesClassifier()
        self.lexicon_classifier = LexiconClassifier()
        self.ml_classifier = MachineLearningClassifier(trainset)

    # Apply the classifier over a tweet message in String format
    def classify(self, tweet_text):

        # 0. Pre-process the text (emoticons, misspellings, tagger)
        tweet_text = pre_process(tweet_text)

        # 1. Rule-based classifier. Look for emoticons basically
        positive_score, negative_score = self.rules_classifier.classify(
            tweet_text)
        rules_score = positive_score + negative_score

        # 1. Apply the rules, If any found, classify the tweet here. If none found, continue for the lexicon classifier.
        if rules_score != 0:
            if rules_score > 0:
                sentiment = 'positive'
            else:
                sentiment = 'negative'
            return sentiment

        # 2. Lexicon-based classifier
        positive_score, negative_score = self.lexicon_classifier.classify(
            tweet_text)
        lexicon_score = positive_score + negative_score

        # 2. Apply lexicon classifier, If the lexicon score is
        # 0 (strictly neutral), >3 (positive with confidence) or
        # <3 (negative with confidence), classify the tweet here. If not,
        # continue for the SVM classifier
        if lexicon_score == 0:
            sentiment = 'neutral'
            return sentiment

        if lexicon_score >= 3:
            sentiment = 'positive'
            return sentiment

        if lexicon_score <= -3:
            sentiment = 'negative'
            return sentiment

        # 3. Machine learning based classifier - used the training set to define the best features to classify new instances
        scores = self.ml_classifier.classify(tweet_text)
        positive_conf = scores[0][1]
        negative_conf = scores[1][1]
        neutral_conf = scores[2][1]

        # 3. Apply machine learning classifier, If positive or negative
        # confidence (probability) is >=0.3, classify with the sentiment.
        # Otherwise, classify as neutral
        if positive_conf >= 0.3 and negative_conf < positive_conf:
            sentiment = 'positive'
        elif negative_conf >= 0.3:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'

        return sentiment