Пример #1
0
	def classify_comment(self, comment, classifier_type='SVM', no_classes=5):
		sentiment = None
		preprocessor = Preprocessing()
		vectorizer = VectorQuantization()
		if classifier_type == 'SVM':
			preprocessed_comment = preprocessor.preprocessing(comment)
			comment_vector = vectorizer.morphosyntactic_vector(preprocessed_comment)
			sentiment = classifier_svm.classify(comment_vector)
			if no_classes == 5:
				return sentiment
			elif no_classes == 3:
				if sentiment == u'positivo' or sentiment == u'muy_positivo':
					return u'positivo'
				elif sentiment == u'negativo' or sentiment == u'muy_negativo':
					return u'negativo'
				else:
					return sentiment
		elif classifier_type == 'MNB':
			preprocessed_comment = preprocessor.preprocessing(comment)
			comment_vector = vectorizer.bigram_vector(preprocessed_comment)
			sentiment = classifier_mnb.classify(comment_vector)
			if no_classes == 5:
				return sentiment
			elif no_classes == 3:
				if sentiment == u'positivo' or sentiment == u'muy_positivo':
					return u'positivo'
				elif sentiment == u'negativo' or sentiment == u'muy_negativo':
					return u'negativo'
				else:
					return sentiment
Пример #2
0
	def create(self, corpus_path, model_type="morphosyntactic"):
		preprocessor = Preprocessing()
		vectorizer = VectorQuantization()
		document_list = []
		with codecs.open(corpus_path, 'r', 'utf-8') as corpus:
			line = corpus.readline()
			while line:
				comment = preprocessor.preprocessing(line.split('/|/')[1])
				category = line.split('/|/')[2].split('\n')[0]
				if model_type == "morphosyntactic":
					comment_vector = vectorizer.morphosyntactic_vector(comment)
				elif model_type == "bigram":
					comment_vector = vectorizer.bigram_vector(comment)
				else:
					print "No model defined using default: morphosyntactic"
					comment_vector = vectorizer.morphosyntactic_vector(comment)
				if comment_vector:
					document_list.append(pattern_Document(comment_vector, 
														  type=category))
				line = corpus.readline()
		model = pattern_Model(documents=document_list, weight=None)
		return model