Exemplo n.º 1
0
 def run(self):
     print(self.classifier_config)
     logging.info('Training {0} with standard validation..'.format(
         self.modelname))
     regs = [10**t for t in range(-5, -1)] if self.usepytorch else \
            [2**t for t in range(-2, 4, 1)]
     if self.noreg:
         regs = [1e-9 if self.usepytorch else 1e9]
     scores = []
     for reg in regs:
         if self.usepytorch:
             clf = MLP(self.classifier_config,
                       inputdim=self.featdim,
                       nclasses=self.nclasses,
                       l2reg=reg,
                       seed=self.seed,
                       cudaEfficient=self.cudaEfficient)
             # TODO: Find a hack for reducing nb epoches in SNLI
             clf.fit(self.X['train'],
                     self.y['train'],
                     validation_data=(self.X['valid'], self.y['valid']))
         else:
             clf = LogisticRegression(C=reg, random_state=self.seed)
             clf.fit(self.X['train'], self.y['train'])
         scores.append(
             round(100 * clf.score(self.X['valid'], self.y['valid']), 2))
     logging.info([('reg:' + str(regs[idx]), scores[idx])
                   for idx in range(len(scores))])
     optreg = regs[np.argmax(scores)]
     devaccuracy = np.max(scores)
     logging.info('Validation : best param found is reg = {0} with score \
         {1}'.format(optreg, devaccuracy))
     clf = LogisticRegression(C=optreg, random_state=self.seed)
     logging.info('Evaluating...')
     if self.usepytorch:
         clf = MLP(self.classifier_config,
                   inputdim=self.featdim,
                   nclasses=self.nclasses,
                   l2reg=optreg,
                   seed=self.seed,
                   cudaEfficient=self.cudaEfficient)
         # TODO: Find a hack for reducing nb epoches in SNLI
         clf.fit(self.X['train'],
                 self.y['train'],
                 validation_data=(self.X['valid'], self.y['valid']))
     else:
         clf = LogisticRegression(C=optreg, random_state=self.seed)
         clf.fit(self.X['train'], self.y['train'])
     yhat, yprobs = clf.predict(self.X['test'])
     pred = []
     probs = []
     for i in yprobs:
         probs.append(list(i))
     print(probs)
     for i in yhat:
         pred.append(list(i))
     print(pred)
     testaccuracy = clf.score(self.X['test'], self.y['test'])
     testaccuracy = round(100 * testaccuracy, 2)
     return devaccuracy, testaccuracy, pred, probs
Exemplo n.º 2
0
 def run(self):
     # cross-validation
     logging.info('Training {0} with {1}-fold cross-validation'.format(
         self.modelname, self.k))
     regs = [10**t for t in range(-5, -1)] if self.usepytorch else \
            [2**t for t in range(-1, 6, 1)]
     skf = StratifiedKFold(n_splits=self.k,
                           shuffle=True,
                           random_state=self.seed)
     scores = []
     for reg in regs:
         scanscores = []
         for train_idx, test_idx in skf.split(self.train['X'],
                                              self.train['y']):
             # Split data
             X_train, y_train = self.train['X'][train_idx], self.train['y'][
                 train_idx]
             X_test, y_test = self.train['X'][test_idx], self.train['y'][
                 test_idx]
             # Train classifier
             if self.usepytorch:
                 clf = MLP(self.classifier_config,
                           inputdim=self.featdim,
                           nclasses=self.nclasses,
                           l2reg=reg,
                           seed=self.seed)
                 clf.fit(X_train, y_train, validation_data=(X_test, y_test))
             else:
                 clf = LogisticRegression(C=reg, random_state=self.seed)
                 clf.fit(X_train, y_train)
             score = clf.score(X_test, y_test)
             scanscores.append(score)
         # Append mean score
         scores.append(round(100 * np.mean(scanscores), 2))
     # evaluation
     logging.info([('reg:' + str(regs[idx]), scores[idx])
                   for idx in range(len(scores))])
     optreg = regs[np.argmax(scores)]
     devaccuracy = np.max(scores)
     logging.info('Cross-validation : best param found is reg = {0} \
         with score {1}'.format(optreg, devaccuracy))
     logging.info('Evaluating...')
     if self.usepytorch:
         clf = MLP(self.classifier_config,
                   inputdim=self.featdim,
                   nclasses=self.nclasses,
                   l2reg=optreg,
                   seed=self.seed)
         clf.fit(self.train['X'], self.train['y'], validation_split=0.05)
     else:
         clf = LogisticRegression(C=optreg, random_state=self.seed)
         clf.fit(self.train['X'], self.train['y'])
     yhat = clf.predict(self.test['X'])
     testaccuracy = clf.score(self.test['X'], self.test['y'])
     testaccuracy = round(100 * testaccuracy, 2)
     return devaccuracy, testaccuracy, yhat
Exemplo n.º 3
0
    def run(self):
        logging.info(
            'Training {0} with (inner) {1}-fold cross-validation'.format(
                self.modelname, self.k))
        regs = [10**t for t in range(-5, -1)] if self.usepytorch else \
               [2**t for t in range(-2, 4, 1)]
        skf = StratifiedKFold(n_splits=self.k, shuffle=True, random_state=1111)
        innerskf = StratifiedKFold(n_splits=self.k,
                                   shuffle=True,
                                   random_state=1111)
        count = 0
        for train_idx, test_idx in skf.split(self.X, self.y):
            count += 1
            X_train, X_test = self.X[train_idx], self.X[test_idx]
            y_train, y_test = self.y[train_idx], self.y[test_idx]
            scores = []
            for reg in regs:
                regscores = []
                for inner_train_idx, inner_test_idx in innerskf.split(
                        X_train, y_train):
                    X_in_train, X_in_test = X_train[inner_train_idx], X_train[
                        inner_test_idx]
                    y_in_train, y_in_test = y_train[inner_train_idx], y_train[
                        inner_test_idx]
                    if self.usepytorch:
                        clf = MLP(self.classifier_config,
                                  inputdim=self.featdim,
                                  nclasses=self.nclasses,
                                  l2reg=reg,
                                  seed=self.seed)
                        clf.fit(X_in_train,
                                y_in_train,
                                validation_data=(X_in_test, y_in_test))
                    else:
                        clf = LogisticRegression(C=reg, random_state=self.seed)
                        clf.fit(X_in_train, y_in_train)
                    regscores.append(clf.score(X_in_test, y_in_test))
                scores.append(round(100 * np.mean(regscores), 2))
            optreg = regs[np.argmax(scores)]
            logging.info('Best param found at split {0}: l2reg = {1} \
                with score {2}'.format(count, optreg, np.max(scores)))
            self.devresults.append(np.max(scores))
            if self.usepytorch:
                clf = MLP(self.classifier_config,
                          inputdim=self.featdim,
                          nclasses=self.nclasses,
                          l2reg=optreg,
                          seed=self.seed)
                clf.fit(X_train, y_train, validation_split=0.05)
            else:
                clf = LogisticRegression(C=optreg, random_state=self.seed)
                clf.fit(X_train, y_train)

            yhat = clf.predict(X_test)
            if (self.nclasses == 2):
                self.f1results.append(round(100 * f1_score(yhat, y_test), 2))

            self.testresults.append(round(100 * clf.score(X_test, y_test), 2))

        devaccuracy = round(np.mean(self.devresults), 2)
        testaccuracy = round(np.mean(self.testresults), 2)
        if self.f1results:
            testf1 = round(np.mean(self.f1results), 2)
        else:
            testf1 = None
        return devaccuracy, testaccuracy, testf1