示例#1
0
def make_voter(estimators, y, voting='hard'):
    estimators = list(estimators.items())
    clf = VotingClassifier(estimators, voting)
    clf.estimators_ = [estim for name, estim in estimators]
    clf.le_ = LabelEncoder()
    clf.le_.fit(y)
    clf.classes_ = clf.le_.classes_
    return clf
示例#2
0
    def _oos_eval(self, clfs, func, meta=False, *args, **kwargs):
        # If we're in the meta case, just call this several times regularly
        if meta:
            oos = []
            # Jackknife for proportionally fewer cases in meta eval
            for _ in range(int(np.ceil(self.n_jack*self.n_oos))):
                tmpclf, tmpoos = self._oos_eval(clfs, func, meta=False,
                                                *args, **kwargs)
                clf = tmpclf
                oos += [tmpoos]
                del tmpoos
            return clf, oos

        # Generate test / oos data
        oos = {}
        Xo, yo, grpo = self._prep_data(self.dat_t, self.tar_t, self.sam_t,
                                       func, *args, **kwargs)

        # Aggregate classifiers across folds and pre-load training
        clf = VotingClassifier(voting='soft',
                               estimators=[(i, c) for i, c in enumerate(clfs)])
        clf.estimators_ = clfs
        clf.le_ = LabelEncoder().fit(yo)
        clf.classes_ = clf.le_.classes_

        # Evaluate voting classifier on test data
        pred = clf.predict(Xo)
        oos['true'] = yo
        oos['pred'] = pred
        oos['acc'] = accuracy_score(yo, pred)
        oos['f1'] = f1_score(yo, pred)
        # Compare to mean oos-performance of component classifiers
        comp_preds = [c.predict(Xo) for c in clfs]
        oos['comp_acc'] = np.mean([accuracy_score(yo, cp) for cp in comp_preds])
        oos['comp_f1'] = np.mean([f1_score(yo, cp) for cp in comp_preds])

        f1p, accp = self.performanceP(yo, oos['f1'], oos['acc'])
        oos['p_f1'] = f1p
        oos['p_acc'] = accp
        # Print performance
        if self.verbose:
            print("Y: ", pred, "->", yo)
            print("G: ", grpo)
            print("Test Accuracy: {0} (p <= {1})".format(oos['acc'], accp))
            print("Test F1: {0} (p<= {1})".format(oos['f1'], f1p))

        return clf, oos
示例#3
0
 def fit_voting(self):
     voting = 'soft'
     names = [
         # 'svm(word_n_grams,char_n_grams,all_caps,hashtags,punctuations,punctuation_last,emoticons,emoticon_last,'
         # 'elongated,negation_count)',
         # 'logreg(w2v_doc)',
         # 'logreg(w2v_word_avg_google)',
         'word2vec_bayes',
         'cnn_word(embedding=google)',
         'rnn_word(embedding=google)',
     ]
     classifiers = [ExternalModel({
         self.val_docs: os.path.join(self.data_dir, 'results/val/{}.json'.format(name)),
         self.test_docs: os.path.join(self.data_dir, 'results/test/{}.json'.format(name)),
     }) for name in names]
     all_scores = []
     for classifier in classifiers:
         scores = classifier.predict_proba(self.val_docs)
         if voting == 'hard':
             scores = Binarizer(1 / 3).transform(scores)
         all_scores.append(scores)
     all_scores = np.array(all_scores)
     all_scores_first, all_scores_rest = all_scores[0], all_scores[1:]
     le = LabelEncoder().fit(self.classes_)
     val_label_indexes = le.transform(self.val_labels())
     # assume w_0=1 as w is invariant to scaling
     w = basinhopping(
         lambda w_: -(val_label_indexes == np.argmax((
             all_scores_first + all_scores_rest * w_.reshape((len(w_), 1, 1))
         ).sum(axis=0), axis=1)).sum(), np.ones(len(classifiers) - 1), niter=1000,
         minimizer_kwargs=dict(method='L-BFGS-B', bounds=[(0, None)] * (len(classifiers) - 1))
     ).x
     w = np.hstack([[1], w])
     w /= w.sum()
     logging.info('w: {}'.format(w))
     estimator = VotingClassifier(list(zip(names, classifiers)), voting=voting, weights=w)
     estimator.le_ = le
     estimator.estimators_ = classifiers
     return 'vote({})'.format(','.join(names)), estimator