Пример #1
0
def supperclassify(train_set, train_label, test_set, test_label):
    '''Different methods'''
    train_voted = voting(train_set)
    aux = train_voted == train_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(train_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(
        train_label, train_voted)
    print 'Estimator VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score
    print '\n'

    lambdas = weighted_voting_getlambdas(train_set, train_label)
    results = weighted_voting(test_set, lambdas)

    aux = results == test_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(test_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(
        test_label, results)
    print 'Estimator W_VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score

    rf = clf.classifier_randomForest(train_set, train_label)
    results = clf.evaluateResults(rf,
                                  test_set,
                                  test_label,
                                  estimator_name='RF')

    lr = clf.logistic_regression(train_set, train_label)
    results = clf.evaluateResults(lr,
                                  test_set,
                                  test_label,
                                  estimator_name='LR')

    svm = clf.classifier_svm(train_set, train_label)
    results = clf.evaluateResults(svm,
                                  test_set,
                                  test_label,
                                  estimator_name='SVM')

    rbf = clf.rbf_classifier(train_set, train_label)
    results = clf.evaluateResults(rbf,
                                  test_set,
                                  test_label,
                                  estimator_name='RBF')
Пример #2
0
def supperclassify(train_set, train_label, test_set, test_label):
    '''Different methods'''
    train_voted = voting(train_set)
    aux = train_voted == train_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(train_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(train_label, train_voted)
    print 'Estimator VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score
    print '\n'

    lambdas = weighted_voting_getlambdas(train_set, train_label)
    results = weighted_voting(test_set, lambdas)

    aux = results == test_label
    correct = sum(aux.astype(int))
    _accuracy = (correct * 100) / len(test_label)
    _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(test_label, results)
    print 'Estimator W_VOTING'
    print 'Average Accuracy:\t', _accuracy
    print 'Average Precision:\t', _precision
    print 'Average Recall:\t', _recall
    print 'Average F1 Measure:\t', _f1score

    rf = clf.classifier_randomForest(train_set, train_label)
    results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF')

    lr = clf.logistic_regression(train_set, train_label)
    results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR')

    svm = clf.classifier_svm(train_set, train_label)
    results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM')

    rbf = clf.rbf_classifier(train_set, train_label)
    results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF')
    # import pdb; pdb.set_trace()
Пример #3
0
        tweets.append(tweet['clean'])
        # labels.append(tweet['class'])

    tweets_SEPLN = np.array(tweets)
    # labels_SEPLN = np.array(labels)

    print '\nCreating Test set for super classifier ... '
    val_tweet_trans = vectorizer.transform(tweets_SEPLN)
    val_tweet_trans = val_tweet_trans.toarray()

    SEPLN_results = clf.test_classifiers(val_tweet_trans, 0, classifiers)
    '''
    Now we have a train_results and test_results. Lets train and test a super classifier
    '''
    print '\nTraining super classifier ... '
    super_clf = clf.rbf_classifier(train_results, test_labels)

    print '\nEvaluating Super classifier ... '
    rbf_results = super_clf.predict(SEPLN_results)
    # rbf_results, _, _, _, _ = clf.evaluateResults(super_clf, train_results, test_labels)
    # validation_labels,
    # estimator_name='Supper Classifier')
    import classify_diagnosis as cd

    lambdas = cd.weighted_voting_getlambdas(train_results, test_labels)
    w_results = cd.weighted_voting(SEPLN_results, lambdas)

    v_results = diagnose.voting(SEPLN_results)

    # polarity = np.array(['NONE', 'N+', 'N', 'NEU', 'P', 'P+'])
    polarity = np.array(['NONE', 'N', 'NEU', 'P'])
Пример #4
0
        train_results = clf.test_classifiers(test_tweet_trans, test_labels, classifiers)

        '''
        Train the super classifier on the test set
        '''
        print '\nCreating Test set for super classifier ... '
        val_tweet_trans = vectorizer.transform(validation_tweets)
        val_tweet_trans = val_tweet_trans.toarray()

        test_results = clf.test_classifiers(val_tweet_trans, validation_labels, classifiers)

        '''
        Now we have a train_results and test_results. Lets train and test a super classifier
        '''
        print '\nTraining super classifier ... '
        super_clf = clf.rbf_classifier(train_results, test_labels)

        print '\nEvaluating Super classifier ... '
        results, accuracy, precision, recall, f_measure = clf.evaluateResults(super_clf, test_results,
                                                                              validation_labels,
                                                                              estimator_name='Supper Classifier')
        print '\n\nSuperClassify partition', j, '\n'
        diagnose.supperclassify(train_results, test_labels, test_results, validation_labels)


    # np.savetxt("train_results_3.csv", train_results, delimiter=",")
    # np.savetxt("train_labels_3.csv", test_labels, delimiter=",")
    # np.savetxt("test_results_3.csv", test_results, delimiter=",")
    # np.savetxt("test_labels_3.csv", validation_labels, delimiter=",")

    # import pdb; pdb.set_trace()