示例#1
0
    def classify(self, input_text=None):

        training_text = []

        for language in self.languages:

            corpusdir = os.path.join(self.configs['dirs']['CORPUS_DIR'],
                                     language)
            stopwrds = stopwords.words(language)
            training_text.extend([(w.lower(), language)
                                  for w in get_corpus(corpusdir)
                                  if len(w) >= 2 and w not in stopwrds])

        self.word_features = self.word_feats(self.get_words_in(training_text))

        training_set = nltk.classify.apply_features(self.extract,
                                                    training_text)
        classifier = nltk.NaiveBayesClassifier.train(training_set)

        classifier = nltk.NaiveBayesClassifier.train(training_set)

        text = input_text if input_text else self.input_text
        prediction = classifier.classify(self.extract(self.input_text))

        return prediction
示例#2
0
def main():
    snow_queen = get_corpus(
        corpusdir="/Users/spiridoulaoregan/nltk_data/test_data",
        filename="snow_queen_german.txt")
    #root_dir, input_text, config_dirs
    oc = OracleClassifier(root_dir="/home/roulaoregan/algo/nltk_data",
                          input_text=snow_queen,
                          config_dirs=os.path.join(os.getcwd(), "configs",
                                                   "dirs.json"))
    print "classified: ", oc.classify()
示例#3
0
	def classify(self, input_text=None):

		training_text = []

		for language in self.languages:

			corpusdir = os.path.join(self.configs['dirs']['CORPUS_DIR'], language)
			stopwrds = stopwords.words(language)
			training_text.extend([(w.lower(), language) for w in get_corpus(corpusdir) if len(w) >= 2 and w not in stopwrds])

		self.word_features = self.word_feats(self.get_words_in(training_text))

		training_set = nltk.classify.apply_features(self.extract, training_text)
		classifier = nltk.NaiveBayesClassifier.train(training_set)

		classifier = nltk.NaiveBayesClassifier.train(training_set)

		text = input_text if input_text else self.input_text
		prediction = classifier.classify(self.extract(self.input_text))
		
		return prediction
示例#4
0
def main():
	snow_queen = get_corpus(corpusdir="/Users/spiridoulaoregan/nltk_data/test_data", filename="snow_queen_german.txt")
	#root_dir, input_text, config_dirs
	oc = OracleClassifier(root_dir="/home/roulaoregan/algo/nltk_data", input_text=snow_queen, config_dirs=os.path.join(os.getcwd(), "configs", "dirs.json"))
	print  "classified: ", oc.classify()