Пример #1
0
    print '\nTraining Classifiers:\n'
    # forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls = clf.train_classifiers(tweets_features,train_labels)
    forest_cls, svm_cls, lr_cls, ada_cls = clf.train_classifiers(
        tweets_features, train_labels)
    '''
    Create results dataset from classifiers. Where each attribute is a classifier and each row corresponds to the
    classification of a tweet according to each classifier.
    '''

    print '\nCreating Train set for super classifier ... '
    test_tweet_trans = vectorizer.transform(test_tweets)
    test_tweet_trans = test_tweet_trans.toarray()

    # classifiers = (forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls)
    classifiers = (forest_cls, svm_cls, lr_cls, ada_cls)
    train_results = clf.test_classifiers(test_tweet_trans, test_labels,
                                         classifiers)
    '''
    Train the super classifier on the test set
    '''

    xmlTestFile = '../DATA/general-tweets-test1k.xml'
    tweets = xml.readXMLTest(xmlTestFile)

    tokenized_tweets = []
    tweetids = []
    for tweet in tweets:
        tokenized_tweets.append(ut.tokenize(tweet.content, tweet.polarity))
        tweetids.append(tweet.id)

    tweets = []
    labels = []
Пример #2
0
        '''
        print '\nTraining Classifiers:\n'
        # forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls = clf.train_classifiers(tweets_features,train_labels)
        forest_cls, svm_cls, lr_cls, ada_cls = clf.train_classifiers(tweets_features, train_labels)
        '''
        Create results dataset from classifiers. Where each attribute is a classifier and each row corresponds to the
        classification of a tweet according to each classifier.

        '''
        print '\nCreating Train set for super classifier ... '
        test_tweet_trans = vectorizer.transform(test_tweets)
        test_tweet_trans = test_tweet_trans.toarray()

        # classifiers = (forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls)
        classifiers = (forest_cls, svm_cls, lr_cls, ada_cls)
        train_results = clf.test_classifiers(test_tweet_trans, test_labels, classifiers)

        '''
        Train the super classifier on the test set
        '''
        print '\nCreating Test set for super classifier ... '
        val_tweet_trans = vectorizer.transform(validation_tweets)
        val_tweet_trans = val_tweet_trans.toarray()

        test_results = clf.test_classifiers(val_tweet_trans, validation_labels, classifiers)

        '''
        Now we have a train_results and test_results. Lets train and test a super classifier
        '''
        print '\nTraining super classifier ... '
        super_clf = clf.rbf_classifier(train_results, test_labels)
Пример #3
0
        print '\nTraining Classifiers:\n'
        # forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls = clf.train_classifiers(tweets_features,train_labels)
        forest_cls, svm_cls, lr_cls, ada_cls = clf.train_classifiers(
            tweets_features, train_labels)
        '''
        Create results dataset from classifiers. Where each attribute is a classifier and each row corresponds to the
        classification of a tweet according to each classifier.

        '''
        print '\nCreating Train set for super classifier ... '
        test_tweet_trans = vectorizer.transform(test_tweets)
        test_tweet_trans = test_tweet_trans.toarray()

        # classifiers = (forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls)
        classifiers = (forest_cls, svm_cls, lr_cls, ada_cls)
        train_results = clf.test_classifiers(test_tweet_trans, test_labels,
                                             classifiers)
        '''
        Train the super classifier on the test set
        '''
        print '\nCreating Test set for super classifier ... '
        val_tweet_trans = vectorizer.transform(validation_tweets)
        val_tweet_trans = val_tweet_trans.toarray()

        test_results = clf.test_classifiers(val_tweet_trans, validation_labels,
                                            classifiers)
        '''
        Now we have a train_results and test_results. Lets train and test a super classifier
        '''
        print '\nTraining super classifier ... '
        super_clf = clf.rbf_classifier(train_results, test_labels)
    '''
    print '\nTraining Classifiers:\n'
    # forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls = clf.train_classifiers(tweets_features,train_labels)
    forest_cls, svm_cls, lr_cls, ada_cls = clf.train_classifiers(tweets_features, train_labels)
    '''
    Create results dataset from classifiers. Where each attribute is a classifier and each row corresponds to the
    classification of a tweet according to each classifier.
    '''

    print '\nCreating Train set for super classifier ... '
    test_tweet_trans = vectorizer.transform(test_tweets)
    test_tweet_trans = test_tweet_trans.toarray()

    # classifiers = (forest_cls, svm_cls, rbf_cls, ada_cls, lr_cls)
    classifiers = (forest_cls, svm_cls, lr_cls, ada_cls)
    train_results = clf.test_classifiers(test_tweet_trans, test_labels, classifiers)

    '''
    Train the super classifier on the test set
    '''

    xmlTestFile = '../DATA/general-tweets-test1k.xml'
    tweets = xml.readXMLTest(xmlTestFile)

    tokenized_tweets = []
    tweetids = []
    for tweet in tweets:
        tokenized_tweets.append(ut.tokenize(tweet.content, tweet.polarity))
        tweetids.append(tweet.id)

    tweets = []