def classify_comment(self, comment, classifier_type='SVM', no_classes=5): sentiment = None preprocessor = Preprocessing() vectorizer = VectorQuantization() if classifier_type == 'SVM': preprocessed_comment = preprocessor.preprocessing(comment) comment_vector = vectorizer.morphosyntactic_vector(preprocessed_comment) sentiment = classifier_svm.classify(comment_vector) if no_classes == 5: return sentiment elif no_classes == 3: if sentiment == u'positivo' or sentiment == u'muy_positivo': return u'positivo' elif sentiment == u'negativo' or sentiment == u'muy_negativo': return u'negativo' else: return sentiment elif classifier_type == 'MNB': preprocessed_comment = preprocessor.preprocessing(comment) comment_vector = vectorizer.bigram_vector(preprocessed_comment) sentiment = classifier_mnb.classify(comment_vector) if no_classes == 5: return sentiment elif no_classes == 3: if sentiment == u'positivo' or sentiment == u'muy_positivo': return u'positivo' elif sentiment == u'negativo' or sentiment == u'muy_negativo': return u'negativo' else: return sentiment
def create(self, corpus_path, model_type="morphosyntactic"): preprocessor = Preprocessing() vectorizer = VectorQuantization() document_list = [] with codecs.open(corpus_path, 'r', 'utf-8') as corpus: line = corpus.readline() while line: comment = preprocessor.preprocessing(line.split('/|/')[1]) category = line.split('/|/')[2].split('\n')[0] if model_type == "morphosyntactic": comment_vector = vectorizer.morphosyntactic_vector(comment) elif model_type == "bigram": comment_vector = vectorizer.bigram_vector(comment) else: print "No model defined using default: morphosyntactic" comment_vector = vectorizer.morphosyntactic_vector(comment) if comment_vector: document_list.append(pattern_Document(comment_vector, type=category)) line = corpus.readline() model = pattern_Model(documents=document_list, weight=None) return model