def start_naivebayes_classification(self, array, mode, ngrambow, minborder, maxborder, nr, tuplebows): """ Start Naive Bayes classification learning. Return results (resultscores_tuple, N.A., N.A.)""" nbObject = Start_NB(array, mode, self.tweet_class, True, tuplebows, ngrambow, self.CROSS_VALIDATION) results = nbObject.start_naivebayes_evaluation(mode, minborder, maxborder, nr) return results
def get_activity_tweets(self, inputfile, mode, ngrambow, nr, loadclassifier): """ Extract new activity tweets from file""" # Create classifier on trainingdata of class (array, tuplebows) = self.setup_input_classification(mode, ngrambow, 0, 0, nr) if "svm" in mode: svmObject = Start_SVM(array, mode, self.tweet_class, True, tuplebows, self.CROSS_VALIDATION) if "nb" in mode: nbObject = Start_NB(array, mode, self.tweet_class, True, tuplebows, ngrambow, self.CROSS_VALIDATION) # Get tweets of new data new_tweets = {} index = 0 newdata = csv.reader(open(inputfile, "rU"), delimiter=self.DELIMITER) for i, row in enumerate(newdata): if row[5] == "" and row[0].isdigit(): new_tweets[index] = row[3] index += 1 # Preprocess new data inputfile_filename = inputfile.split(".")[0] self.preprocess_tweets(mode, new_tweets, inputfile_filename) array = self.get_preprocessed_array(mode) # Classify newdata if "svm" in mode: prediction = svmObject.start_classification(mode, array, loadclassifier, 0.001, 10) if "nb" in mode: prediction = nbObject.start_classification(mode, array, False, loadclassifier) # Print to file self.count_classes(prediction.tolist()) classification_filename = inputfile_filename + "_class.csv" helpers.write_classification_to_tweetfile(prediction, 0, 5, inputfile, classification_filename)
def analysis_classification(self, mode, ngrambow, nr, loadclassifier): """ Analyse classification of training & testdata""" DELIMITER = "\t" # Get tweets all_tweets = {} index = 0 data = csv.reader(open(self.TRAININGFILE, "rU"), delimiter=DELIMITER) for i, row in enumerate(data): if i == 0: pass else: all_tweets[index] = row[3] index += 1 # Create classifier on trainingdata of class (array, tuplebows) = self.setup_input_classification(mode, ngrambow, 0, 0, nr) if "svm" in mode: svmObject = Start_SVM(array, mode, self.tweet_class, True, tuplebows, self.CROSS_VALIDATION) if "nb" in mode: nbObject = Start_NB(array, mode, self.tweet_class, True, tuplebows, ngrambow, self.CROSS_VALIDATION) print "preprocess new data" # Preprocess new dataata training_filename = self.TRAININGFILE.split(".")[0] self.preprocess_tweets(mode, self.tweets, training_filename) array = self.get_preprocessed_array(mode) # Classify tweets if "svm" in mode: prediction = svmObject.start_classification(mode, array, loadclassifier, 0.001, 10) if "nb" in mode: prediction = nbObject.start_classification(mode, array, False, loadclassifier) self.count_classes(prediction.tolist()) classification_filename = training_filename + "_class.csv" helpers.write_classification_to_tweetfile(prediction, 1, 7, self.TRAININGFILE, classification_filename)