Пример #1
0
    def run_LR(self, rundir=None):
        """ Demo script that evaluates the supervised prediction
            performance of the p(t|d) (stored in theta.npy) of
            an the LDA model in `rundir`.
        """

        if not rundir:
            rundir = self.rundir

        ( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR)
        allX = np.vstack((trainX,testX))
        allY = np.concatenate((trainY,testY))

        if not self.LRparams:
            self.grid_search(allX,allY)

        lr = logistic_regression.train_lrpipe(trainX, trainY, self.LRparams)

        allLabels = load_labels(NIPSDIR)
        allTitles = load_titles(NIPSDIR)
        evaluate(lr, testX, testY, testTitles=allTitles[test_ids], testLabels=allLabels[test_ids])
Пример #2
0
    def run_SVM(self, rundir=None):
        """ Demo script that evaluates the supervised prediction
            performance of the p(t|d) (stored in theta.npy) of
            an the LDA model in `rundir`.
        """

        if not rundir:
            rundir = self.rundir

        ( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR)
        allX = np.vstack((trainX,testX))
        allY = np.concatenate((trainY,testY))

        if not self.SVMparams:
            self.grid_search(allX,allY)

        sv = support_vector_machines.train_svpipe(trainX, trainY, self.SVMparams)

        allLabels = load_labels(NIPSDIR)
        allTitles = load_titles(NIPSDIR)
        evaluate(sv, testX, testY, testTitles=allTitles[test_ids], testLabels=allLabels[test_ids])
from scikits.learn.grid_search import GridSearchCV
from scikits.learn import metrics
from scikits.learn.pipeline import Pipeline

from liblda.supervised.load_data import load_data

import logging
logger = logging.getLogger('SVM')
logger.setLevel(logging.INFO)

rundir = '/Users/ivan/Homes/master/Documents/Projects/runs/reduced1/'
NIPSDIR = "/CurrentPorjects/LatentDirichletAllocation/data/NIPS1-17/"

((trainX, trainY, train_ids), (testX, testY,
                               test_ids)) = load_data(rundir,
                                                      dataset=NIPSDIR,
                                                      normalize_cols=False)
allX = np.vstack((trainX, testX))
allY = np.concatenate((trainY, testY))

print "called:"
print "( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False)"


def do_grid_search(X, Y, gs_params=None):
    """ Given data (X,Y) will perform a grid search on g_params
        for a LogisticRegression called logreg
        """
    svpipe = Pipeline([('rbfsvm', SVC())])
    if not gs_params:
        gs_params = {
from liblda.supervised.load_data import load_data




import logging
logging.basicConfig(format = '%(asctime)s : %(levelname)s : %(message)s', level = logging.INFO)
logger = logging.getLogger('LogReg')
logger.setLevel(logging.INFO)



rundir = '/Users/ivan/Homes/master/Documents/Projects/runs/reduced1/'
NIPSDIR = "/CurrentPorjects/LatentDirichletAllocation/data/NIPS1-17/"

( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False)
allX = np.vstack((trainX,testX))
allY = np.concatenate((trainY,testY))

print "called:"
print "( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False)"
 
LRparametersL1 = {
        'logreg__C': (0.1, 1, 2, 5, 10, 50),
        'logreg__penalty': ('l1',) ,
        }

LRparametersL2 = {
        'logreg__C': (0.001, 0.01, 0.1, 1),
        'logreg__penalty': ('l2',) ,
        }