Exemplo n.º 1
0
 def get_correctly_classified_tweets(self, tweets_and_sentiment):
     """
     Classifies the given set of tweets and returns the ones that were correctly classified.
     """
     tweets, sentimentvalues = zip(*tweets_and_sentiment)
     if sentimentvalues!=None:
         self.test_words_and_values = sentimentvalues
     count_vector = self.vect.transform([t.text for t in tweets])
     tfidf_count = self.tfidf_transformer.transform(count_vector)
     if self.only_text_features:
         combined_vector = tfidf_count
     else:
         dict_vector = self.dict_vectorizer.transform([features.get_feature_set(t, self.featureset, v) for t,v in zip(tweets, self.test_words_and_values)])
         tfidf_dict = self.dict_transformer.transform(dict_vector)
         combined_vector = sp.hstack([tfidf_count, tfidf_dict])
             
     predictions = self.best_estimator.predict(combined_vector)
     tweets, targets = utils.make_subjectivity_targets(tweets)
     #return the tweets where the target match prediction
     correct_tweets = []
     correct_sentimentvalues = []
     for i in xrange(len(tweets)):
         if predictions[i]==targets[i]:
             correct_tweets.append(tweets[i])
             correct_sentimentvalues.append(sentimentvalues[i])
     return correct_tweets, correct_sentimentvalues
Exemplo n.º 2
0
def get_optimal_subjectivity_classifier():
    """
    Trains and returns the optimal subjectivity classifier.
    """
    tweets = utils.get_pickles(3)
    tweets, targets = utils.make_subjectivity_targets(tweets)
    vect_options = {
          'ngram_range': (1,1),
          'max_df': 0.5
        }
    tfidf_options = {
         'sublinear_tf': False,
          'use_idf': True,
          'smooth_idf': True,
                     }
    clf = SVM(tweets, targets, vect_options, tfidf_options)
    clf.set_feature_set('SA', utils.get_sentimentvalues(3))
    clf.train_on_feature_set()
    return clf