Пример #1
0
def supperclassify(train_set, train_label, test_set, test_label):
    '''Different methods'''
    train_voted = voting(train_set)
    aux = train_voted == train_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(train_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(
        train_label, train_voted)
    print 'Estimator VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score
    print '\n'

    lambdas = weighted_voting_getlambdas(train_set, train_label)
    results = weighted_voting(test_set, lambdas)

    aux = results == test_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(test_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(
        test_label, results)
    print 'Estimator W_VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score

    rf = clf.classifier_randomForest(train_set, train_label)
    results = clf.evaluateResults(rf,
                                  test_set,
                                  test_label,
                                  estimator_name='RF')

    lr = clf.logistic_regression(train_set, train_label)
    results = clf.evaluateResults(lr,
                                  test_set,
                                  test_label,
                                  estimator_name='LR')

    svm = clf.classifier_svm(train_set, train_label)
    results = clf.evaluateResults(svm,
                                  test_set,
                                  test_label,
                                  estimator_name='SVM')

    rbf = clf.rbf_classifier(train_set, train_label)
    results = clf.evaluateResults(rbf,
                                  test_set,
                                  test_label,
                                  estimator_name='RBF')
Пример #2
0
def supperclassify(train_set, train_label, test_set, test_label):
    '''Different methods'''
    train_voted = voting(train_set)
    aux = train_voted == train_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(train_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(train_label, train_voted)
    print 'Estimator VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score
    print '\n'

    lambdas = weighted_voting_getlambdas(train_set, train_label)
    results = weighted_voting(test_set, lambdas)

    aux = results == test_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(test_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(test_label, results)
    print 'Estimator W_VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score

    rf = clf.classifier_randomForest(train_set, train_label)
    results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF')

    lr = clf.logistic_regression(train_set, train_label)
    results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR')

    svm = clf.classifier_svm(train_set, train_label)
    results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM')

    rbf = clf.rbf_classifier(train_set, train_label)
    results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF')
    # import pdb; pdb.set_trace()
Пример #3
0
        Training different classifiers.
        '''
        svm = clf.classifier_svm(tweets_features, train_labels)
        rf = clf.classifier_randomForest(tweets_features, train_labels)
        ada = clf.adaboost(tweets_features, train_labels)
        lr = clf.logistic_regression(tweets_features, train_labels)

        '''
        Test the different classifiers with the test tweets.
        '''

        pred = vectorizer.transform(test_tweets)
        pred = pred.toarray()

        _results, _accuracyLR, _precisionLR, _recallLR, _f_measureLR = clf.evaluateResults(lr, pred, test_labels,
                                                                                           estimator_name='Logistic regression',
                                                                                           file_name=results_folder)
        _results, _accuracyRF, _precisionRF, _recallRF, _f_measureRF = clf.evaluateResults(rf, pred, test_labels,
                                                                                           estimator_name='RF',
                                                                                           file_name=results_folder)

        _results, _accuracySVM, _precisionSVM, _recallSVM, _f_measureSVM = clf.evaluateResults(svm, pred, test_labels,
                                                                                               estimator_name='SVM',
                                                                                               file_name=results_folder)
        _results, _accuracyADA, _precisionADA, _recallADA, _f_measureADA = clf.evaluateResults(ada, pred, test_labels,
                                                                                               estimator_name='ADABOOST',
                                                                                               file_name=results_folder)

        accuracyLR.append(_accuracyLR)
        precisionLR.append(_precisionLR)
        recallLR.append(_recallLR)
Пример #4
0
    # oneVSall_rf = clf.onevsall(tweets_features, train_labels, estimator)
    '''
    Test the different classifiers with the test tweets.
    '''

    pred = vectorizer.transform(test_tweets)
    pred = pred.toarray()
    # pred = SelectKBest(chi2, k=4500).fit_transform(pred, test_labels)

    # evaluateResults(lda, pred, test_labels, estimator_name='LDA')

    # results, accuracyLR, precisionLR, recallLR, f_measureLR = clf.evaluateResults(lr, pred, test_labels,
    #                                                                           estimator_name='Logistic regression')
    # results, accuracyRF, precisionRF, recallRF, f_measureRF = clf.evaluateResults(forest, pred, test_labels,
    #                                                                           estimator_name='RF')
    results, accuracySVM, precisionSVM, recallSVM, f_measureSVM = clf.evaluateResults(
        svm, pred, test_labels, estimator_name='SVM')
    # results, accuracyADA, precisionADA, recallADA, f_measureADA = clf.evaluateResults(ada, pred, test_labels,
    #                                                                               estimator_name='ADABOOST')
    # results, accuracyMLP, precisionMLP, recallMLP, f_measureMLP = clf.evaluateResults(mlp, pred, test_labels,
    #                                                                               estimator_name='MLP')
    #
    # results, accuracyOVASVM, precisionOVASVM, recallOVASVM, f_measureOVASVM = clf.evaluateResults(oneVSall_svm, pred,
    #                                                                                           test_labels,
    #                                                                                           estimator_name='one versus all SVM')
    # # evaluateResults(oneVSall_mlp, pred, test_labels, estimator_name='one versus all MLP')
    # results, accuracyOVARF, precisionOVARF, recallOVARF, f_measureOVARF = clf.evaluateResults(oneVSall_rf, pred,
    #                                                                                       test_labels,
    #                                                                                       estimator_name='one versus all RF')

    print 'Accuracy SVM:\t', accuracySVM
    print 'Average Precision:\t', precisionSVM
Пример #5
0
    Test the different classifiers with the test tweets.
    '''

    pred = vectorizer.transform(test_tweets)
    pred = pred.toarray()
    # pred = SelectKBest(chi2, k=4500).fit_transform(pred, test_labels)


    # evaluateResults(lda, pred, test_labels, estimator_name='LDA')


    # results, accuracyLR, precisionLR, recallLR, f_measureLR = clf.evaluateResults(lr, pred, test_labels,
    #                                                                           estimator_name='Logistic regression')
    # results, accuracyRF, precisionRF, recallRF, f_measureRF = clf.evaluateResults(forest, pred, test_labels,
    #                                                                           estimator_name='RF')
    results, accuracySVM, precisionSVM, recallSVM, f_measureSVM = clf.evaluateResults(svm, pred, test_labels,
                                                                                      estimator_name='SVM')
    # results, accuracyADA, precisionADA, recallADA, f_measureADA = clf.evaluateResults(ada, pred, test_labels,
    #                                                                               estimator_name='ADABOOST')
    # results, accuracyMLP, precisionMLP, recallMLP, f_measureMLP = clf.evaluateResults(mlp, pred, test_labels,
    #                                                                               estimator_name='MLP')
    #
    # results, accuracyOVASVM, precisionOVASVM, recallOVASVM, f_measureOVASVM = clf.evaluateResults(oneVSall_svm, pred,
    #                                                                                           test_labels,
    #                                                                                           estimator_name='one versus all SVM')
    # # evaluateResults(oneVSall_mlp, pred, test_labels, estimator_name='one versus all MLP')
    # results, accuracyOVARF, precisionOVARF, recallOVARF, f_measureOVARF = clf.evaluateResults(oneVSall_rf, pred,
    #                                                                                       test_labels,
    #                                                                                       estimator_name='one versus all RF')

    print 'Accuracy SVM:\t', accuracySVM
    print 'Average Precision:\t', precisionSVM
Пример #6
0
        Training different classifiers.
        '''
        svm = clf.classifier_svm(tweets_features, train_labels)
        rf = clf.classifier_randomForest(tweets_features, train_labels)
        ada = clf.adaboost(tweets_features, train_labels)
        lr = clf.logistic_regression(tweets_features, train_labels)
        '''
        Test the different classifiers with the test tweets.
        '''

        pred = vectorizer.transform(test_tweets)
        pred = pred.toarray()

        _results, _accuracyLR, _precisionLR, _recallLR, _f_measureLR = clf.evaluateResults(
            lr,
            pred,
            test_labels,
            estimator_name='Logistic regression',
            file_name=results_folder)
        _results, _accuracyRF, _precisionRF, _recallRF, _f_measureRF = clf.evaluateResults(
            rf,
            pred,
            test_labels,
            estimator_name='RF',
            file_name=results_folder)

        _results, _accuracySVM, _precisionSVM, _recallSVM, _f_measureSVM = clf.evaluateResults(
            svm,
            pred,
            test_labels,
            estimator_name='SVM',
            file_name=results_folder)
Пример #7
0
        print '\nCreating Test set for super classifier ... '
        val_tweet_trans = vectorizer.transform(validation_tweets)
        val_tweet_trans = val_tweet_trans.toarray()

        test_results = clf.test_classifiers(val_tweet_trans, validation_labels,
                                            classifiers)
        '''
        Now we have a train_results and test_results. Lets train and test a super classifier
        '''
        print '\nTraining super classifier ... '
        super_clf = clf.rbf_classifier(train_results, test_labels)

        print '\nEvaluating Super classifier ... '
        results, accuracy, precision, recall, f_measure = clf.evaluateResults(
            super_clf,
            test_results,
            validation_labels,
            estimator_name='Supper Classifier')
        print '\n\nSuperClassify partition', j, '\n'
        diagnose.supperclassify(train_results, test_labels, test_results,
                                validation_labels)

    # np.savetxt("train_results_3.csv", train_results, delimiter=",")
    # np.savetxt("train_labels_3.csv", test_labels, delimiter=",")
    # np.savetxt("test_results_3.csv", test_results, delimiter=",")
    # np.savetxt("test_labels_3.csv", validation_labels, delimiter=",")

    # import pdb; pdb.set_trace()

    # printResults(accuracy, precision, recall, f_measure, name="SUPER CLASSIFICATOR")
Пример #8
0
        Train the super classifier on the test set
        '''
        print '\nCreating Test set for super classifier ... '
        val_tweet_trans = vectorizer.transform(validation_tweets)
        val_tweet_trans = val_tweet_trans.toarray()

        test_results = clf.test_classifiers(val_tweet_trans, validation_labels, classifiers)

        '''
        Now we have a train_results and test_results. Lets train and test a super classifier
        '''
        print '\nTraining super classifier ... '
        super_clf = clf.rbf_classifier(train_results, test_labels)

        print '\nEvaluating Super classifier ... '
        results, accuracy, precision, recall, f_measure = clf.evaluateResults(super_clf, test_results,
                                                                              validation_labels,
                                                                              estimator_name='Supper Classifier')
        print '\n\nSuperClassify partition', j, '\n'
        diagnose.supperclassify(train_results, test_labels, test_results, validation_labels)


    # np.savetxt("train_results_3.csv", train_results, delimiter=",")
    # np.savetxt("train_labels_3.csv", test_labels, delimiter=",")
    # np.savetxt("test_results_3.csv", test_results, delimiter=",")
    # np.savetxt("test_labels_3.csv", validation_labels, delimiter=",")

    # import pdb; pdb.set_trace()

    # printResults(accuracy, precision, recall, f_measure, name="SUPER CLASSIFICATOR")