示例#1
0
def supperclassify(train_set, train_label, test_set, test_label):
    '''Different methods'''
    train_voted = voting(train_set)
    aux = train_voted == train_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(train_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(
        train_label, train_voted)
    print 'Estimator VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score
    print '\n'

    lambdas = weighted_voting_getlambdas(train_set, train_label)
    results = weighted_voting(test_set, lambdas)

    aux = results == test_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(test_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(
        test_label, results)
    print 'Estimator W_VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score

    rf = clf.classifier_randomForest(train_set, train_label)
    results = clf.evaluateResults(rf,
                                  test_set,
                                  test_label,
                                  estimator_name='RF')

    lr = clf.logistic_regression(train_set, train_label)
    results = clf.evaluateResults(lr,
                                  test_set,
                                  test_label,
                                  estimator_name='LR')

    svm = clf.classifier_svm(train_set, train_label)
    results = clf.evaluateResults(svm,
                                  test_set,
                                  test_label,
                                  estimator_name='SVM')

    rbf = clf.rbf_classifier(train_set, train_label)
    results = clf.evaluateResults(rbf,
                                  test_set,
                                  test_label,
                                  estimator_name='RBF')
def supperclassify(train_set, train_label, test_set, test_label):
    '''Different methods'''
    train_voted = voting(train_set)
    aux = train_voted == train_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(train_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(train_label, train_voted)
    print 'Estimator VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score
    print '\n'

    lambdas = weighted_voting_getlambdas(train_set, train_label)
    results = weighted_voting(test_set, lambdas)

    aux = results == test_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(test_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(test_label, results)
    print 'Estimator W_VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score

    rf = clf.classifier_randomForest(train_set, train_label)
    results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF')

    lr = clf.logistic_regression(train_set, train_label)
    results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR')

    svm = clf.classifier_svm(train_set, train_label)
    results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM')

    rbf = clf.rbf_classifier(train_set, train_label)
    results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF')
    # import pdb; pdb.set_trace()
示例#3
0
        tweets = np.array(tweets)
        labels = np.array(labels)

        # Extract tweet partition
        train_tweets = tweets[np.array(train)]
        train_tweets, test_tweets, train_labels, test_labels = tweets[train], tweets[test], labels[train], labels[test]

        print len(test_tweets)
        print len(train_tweets)

        train_tweets = np.hstack(train_tweets)
        dictionary, tweets_features, vectorizer = bow.bow(train_tweets, vec="tfidf")
        '''
        Training different classifiers.
        '''
        svm = clf.classifier_svm(tweets_features, train_labels)
        rf = clf.classifier_randomForest(tweets_features, train_labels)
        ada = clf.adaboost(tweets_features, train_labels)
        lr = clf.logistic_regression(tweets_features, train_labels)

        '''
        Test the different classifiers with the test tweets.
        '''

        pred = vectorizer.transform(test_tweets)
        pred = pred.toarray()

        _results, _accuracyLR, _precisionLR, _recallLR, _f_measureLR = clf.evaluateResults(lr, pred, test_labels,
                                                                                           estimator_name='Logistic regression',
                                                                                           file_name=results_folder)
        _results, _accuracyRF, _precisionRF, _recallRF, _f_measureRF = clf.evaluateResults(rf, pred, test_labels,
示例#4
0
    train_tweets = np.hstack(train_tweets)
    dictionary, tweets_features, vectorizer = bow.bow(train_tweets,
                                                      vec="tfidf")
    # dictionary, tweets_features, vectorizer = bow.bow(train_tweets, vec="count")

    # print dictionary
    '''Dimsionality reduction'''
    # LDA
    # lda = clf.lda(tweets_features, train_labels)
    # print tweets_features.shape
    '''
    Training different classifiers.
    '''
    # forest = clf.classifier_randomForest(tweets_features, train_labels)
    svm = clf.classifier_svm(tweets_features, train_labels)
    # mlp = clf.multilayer_perceptron(tweets_features, train_labels)
    # ada = clf.adaboost(tweets_features, train_labels)
    # lr = clf.logistic_regression(tweets_features, train_labels)

    # ONE VS ALL CLASSIFIER WITH DIFFERENT ESTIMATORS.
    # estimator = clf.svm.SVC(random_state=0)
    # oneVSall_svm = clf.onevsall(tweets_features, train_labels, estimator)
    #
    # estimator = clf.MLP()
    # oneVSall_mlp = clf.onevsall(tweets_features, train_labels, estimator)
    #
    # estimator = clf.RandomForestClassifier(n_estimators=50)
    # oneVSall_rf = clf.onevsall(tweets_features, train_labels, estimator)
    '''
    Test the different classifiers with the test tweets.