Пример #1
0
    def start_naivebayes_classification(self, array, mode, ngrambow, minborder, maxborder, nr, tuplebows):
        """ Start Naive Bayes classification learning. Return results (resultscores_tuple, N.A., N.A.)"""

        nbObject = Start_NB(array, mode, self.tweet_class, True, tuplebows, ngrambow, self.CROSS_VALIDATION)
        results = nbObject.start_naivebayes_evaluation(mode, minborder, maxborder, nr)

        return results
Пример #2
0
    def get_activity_tweets(self, inputfile, mode, ngrambow, nr, loadclassifier):
        """ Extract new activity tweets from file"""
        # Create classifier on trainingdata of class
        (array, tuplebows) = self.setup_input_classification(mode, ngrambow, 0, 0, nr)
        if "svm" in mode:
            svmObject = Start_SVM(array, mode, self.tweet_class, True, tuplebows, self.CROSS_VALIDATION)
        if "nb" in mode:
            nbObject = Start_NB(array, mode, self.tweet_class, True, tuplebows, ngrambow, self.CROSS_VALIDATION)
            # Get tweets of new data
        new_tweets = {}
        index = 0
        newdata = csv.reader(open(inputfile, "rU"), delimiter=self.DELIMITER)
        for i, row in enumerate(newdata):
            if row[5] == "" and row[0].isdigit():
                new_tweets[index] = row[3]
                index += 1

                # Preprocess new data
        inputfile_filename = inputfile.split(".")[0]
        self.preprocess_tweets(mode, new_tweets, inputfile_filename)
        array = self.get_preprocessed_array(mode)

        # Classify newdata
        if "svm" in mode:
            prediction = svmObject.start_classification(mode, array, loadclassifier, 0.001, 10)
        if "nb" in mode:
            prediction = nbObject.start_classification(mode, array, False, loadclassifier)

            # Print to file
        self.count_classes(prediction.tolist())
        classification_filename = inputfile_filename + "_class.csv"
        helpers.write_classification_to_tweetfile(prediction, 0, 5, inputfile, classification_filename)
Пример #3
0
    def analysis_classification(self, mode, ngrambow, nr, loadclassifier):
        """ Analyse classification of training & testdata"""
        DELIMITER = "\t"

        # Get tweets
        all_tweets = {}
        index = 0
        data = csv.reader(open(self.TRAININGFILE, "rU"), delimiter=DELIMITER)
        for i, row in enumerate(data):
            if i == 0:
                pass
            else:
                all_tweets[index] = row[3]
                index += 1

                # Create classifier on trainingdata of class
        (array, tuplebows) = self.setup_input_classification(mode, ngrambow, 0, 0, nr)
        if "svm" in mode:
            svmObject = Start_SVM(array, mode, self.tweet_class, True, tuplebows, self.CROSS_VALIDATION)
        if "nb" in mode:
            nbObject = Start_NB(array, mode, self.tweet_class, True, tuplebows, ngrambow, self.CROSS_VALIDATION)

        print "preprocess new data"
        # Preprocess new dataata
        training_filename = self.TRAININGFILE.split(".")[0]
        self.preprocess_tweets(mode, self.tweets, training_filename)
        array = self.get_preprocessed_array(mode)

        # Classify tweets
        if "svm" in mode:
            prediction = svmObject.start_classification(mode, array, loadclassifier, 0.001, 10)
        if "nb" in mode:
            prediction = nbObject.start_classification(mode, array, False, loadclassifier)

        self.count_classes(prediction.tolist())
        classification_filename = training_filename + "_class.csv"
        helpers.write_classification_to_tweetfile(prediction, 1, 7, self.TRAININGFILE, classification_filename)