from classification import train_classifier, test_classifier, cross_validation from sklearn.ensemble import AdaBoostClassifier if __name__ == '__main__': classifier = AdaBoostClassifier() train_classifier(classifier, select_features=True) test_classifier(classifier, select_features=True) print '------------------' cross_validation(classifier, select_features=True)
from classification import train_classifier, test_classifier, cross_validation from sklearn.tree import DecisionTreeClassifier if __name__ == '__main__': classifier = DecisionTreeClassifier() train_classifier(classifier, select_features=False) test_classifier(classifier, select_features=False) print '------------------' cross_validation(classifier, select_features=False)
print('Lexical features added\n') # POS features print('Preparing pos tag ratio vectors') sarc_train_pos_ratios = flatten(pos_features.get_tag_ratio_vector(sarc_train)) sarc_test_pos_ratios = flatten(pos_features.get_tag_ratio_vector(sarc_test)) reg_train_pos_ratios = flatten(pos_features.get_tag_ratio_vector(reg_train)) reg_test_pos_ratios = flatten(pos_features.get_tag_ratio_vector(reg_test)) print('Parts of speech features added') # Classification print() print('Preparing feature vectors of individual dataset') sarc_train_features = classification.get_feature_vector([sarc_train_sent, sarc_train_rating] +sarc_train_punctuations+sarc_train_pos_ratios) sarc_test_features = classification.get_feature_vector([sarc_test_sent, sarc_test_rating] +sarc_test_punctuations+sarc_test_pos_ratios) reg_train_features = classification.get_feature_vector([reg_train_sent, reg_train_rating] +reg_train_punctuations+reg_train_pos_ratios) reg_test_features = classification.get_feature_vector([reg_test_sent, reg_test_rating] +reg_test_punctuations+reg_test_pos_ratios) print('Training and testing classifier') classifier = classification.get_classifier(sarc_train_features, reg_train_features) classification.test_classifier(classifier, sarc_test_features, reg_test_features) # End of code print('The end!') review = {'rating': 1.0, 'review': 'I recently purchased this speaker and I want to tell you that it is totally worth the 1000 bucks!!! The voice is so clear and lound that I can hear voices even from other galaxies!'}