예제 #1
0
def doXVal(folds,
           percent,
           verbose,
           multicore,
           noisy,
           predName,
           domain,
           mlnfile,
           dbfiles,
           logicLearn,
           logicInfer,
           inverse=False,
           testSetCount=1):
    startTime = time.time()

    directory = time.strftime(
        "%a_%d_%b_%Y_%H:%M:%S_K=" + str(folds) + "_TSC=" + str(testSetCount),
        time.localtime())
    os.mkdir(directory)
    os.mkdir(os.path.join(directory, 'FOL'))
    os.mkdir(os.path.join(directory, 'FUZZY'))
    # set up the logger
    log = logging.getLogger('xval')
    fileLogger = FileHandler(os.path.join(directory, 'xval.log'))
    fileLogger.setFormatter(praclog.formatter)
    log.addHandler(fileLogger)

    log.info('Results will be written into %s' % directory)

    # preparations: Read the MLN and the databases
    mln_ = readMLNFromFile(mlnfile,
                           verbose=verbose,
                           logic='FuzzyLogic',
                           grammar='PRACGrammar')
    log.info('Read MLN %s.' % mlnfile)
    dbs = []
    for dbfile in dbfiles:
        db = readDBFromFile(mln_, dbfile)
        if type(db) is list:
            dbs.extend(db)
        else:
            dbs.append(db)
    log.info('Read %d databases.' % len(dbs))

    cwpreds = [pred for pred in mln_.predicates if pred != predName]

    # create the partition of data
    subsetLen = int(math.ceil(len(dbs) * percent / 100.0))
    if subsetLen < len(dbs):
        log.info('Using only %d of %d DBs' % (subsetLen, len(dbs)))
    dbs = sample(dbs, subsetLen)

    if len(dbs) < folds:
        log.error(
            'Cannot do %d-fold cross validation with only %d databases.' %
            (folds, len(dbs)))
        exit(0)

    shuffle(dbs)
    partSize = int(math.ceil(len(dbs) / float(folds)))
    partition = []
    for i in range(folds):
        partition.append(dbs[i * partSize:(i + 1) * partSize])

    foldRunnables = []
    for foldIdx in range(folds):
        partion_ = list(partition)
        params = XValFoldParams()
        params.mln = mln_.duplicate()
        params.testDBs = []
        params.learnDBs = []

        for i in range(0, testSetCount):
            if (foldIdx >= len(partion_)):
                params.testDBs.extend(partion_[0])
                del partion_[0]
            else:
                params.testDBs.extend(partion_[foldIdx])
                del partion_[foldIdx]

        for part in partion_:
            params.learnDBs.extend(part)
        print 'LEARN DBS :' + str(len(params.learnDBs))
        print 'TEST DBS :' + str(len(params.testDBs))

        params.foldIdx = foldIdx
        params.foldCount = folds
        params.noisyStringDomains = noisy
        params.directory = directory
        params.queryPred = predName
        params.queryDom = domain
        params.logicInfer = logicInfer
        foldRunnables.append(XValFold(params))

    if multicore:
        # set up a pool of worker processes
        try:
            workerPool = Pool()
            log.info('Starting %d-fold Cross-Validation in %d processes.' %
                     (folds, workerPool._processes))
            result = workerPool.map_async(runFold, foldRunnables).get()
            workerPool.close()
            workerPool.join()
            cm = ConfusionMatrix()
            for r in result:
                cm.combine(r.confMatrix)
            elapsedTimeMP = time.time() - startTime
            prepareResults(directory, 'FOL')
            prepareResults(directory, 'FUZZY')
        except (KeyboardInterrupt, SystemExit, SystemError):
            log.critical("Caught KeyboardInterrupt, terminating workers")
            workerPool.terminate()
            workerPool.join()
            exit(1)
        except:
            log.error('\n' +
                      ''.join(traceback.format_exception(*sys.exc_info())))
            exit(1)


#     startTime = time.time()
    else:
        log.info('Starting %d-fold Cross-Validation in 1 process.' % (folds))

        for fold in foldRunnables:
            runFold(fold)

        prepareResults(directory, 'FOL')
        prepareResults(directory, 'FUZZY')

        elapsedTimeSP = time.time() - startTime

    if multicore:
        log.info('%d-fold crossvalidation (MP) took %.2f min' %
                 (folds, elapsedTimeMP / 60.0))
    else:
        log.info('%d-fold crossvalidation (SP) took %.2f min' %
                 (folds, elapsedTimeSP / 60.0))
예제 #2
0
        params.querypred = predname
        foldRunnables.append(XValFold(params))
        logger.info('Params for fold %d:\n%s' % (fold_idx, str(params)))

    if multicore:
        # set up a pool of worker processes
        try:
            workerPool = Pool()
            logger.info('Starting %d-fold Cross-Validation in %d processes.' %
                        (folds, workerPool._processes))
            result = workerPool.map_async(runFold, foldRunnables).get()
            workerPool.close()
            workerPool.join()
            cm = ConfusionMatrix()
            for r in result:
                cm.combine(r.confmat)
            elapsedTimeMP = time.time() - startTime
            cm.toFile(os.path.join(expdir, 'conf_matrix.cm'))
            # create the pdf table and move it into the log directory
            # this is a dirty hack since pdflatex apparently
            # does not support arbitrary output paths
            pdfname = 'conf_matrix'
            logger.info('creating pdf if confusion matrix...')
            cm.toPDF(pdfname)
            os.rename('%s.pdf' % pdfname,
                      os.path.join(expdir, '%s.pdf' % pdfname))
        except (KeyboardInterrupt, SystemExit, SystemError):
            logger.critical("Caught KeyboardInterrupt, terminating workers")
            workerPool.terminate()
            workerPool.join()
            exit(1)
예제 #3
0
def prepareResults(directory, logic):
    cm = ConfusionMatrix()
    for f in os.listdir(os.path.join(directory, logic)):
        matrix = pickle.load(open(os.path.join(directory, logic, f), 'rb'))
        cm.combine(matrix)
    cm.toFile(os.path.join(directory, logic, 'conf_matrix.cm'))
예제 #4
0
파일: xval.py 프로젝트: Bovril/pracmln
     params.queryconf = project.queryconf
     params.querypred = predname
     foldRunnables.append(XValFold(params))
     logger.info('Params for fold %d:\n%s' % (fold_idx, str(params)))
 
 if multicore:
     # set up a pool of worker processes
     try:
         workerPool = Pool()
         logger.info('Starting %d-fold Cross-Validation in %d processes.' % (folds, workerPool._processes))
         result = workerPool.map_async(runFold, foldRunnables).get()
         workerPool.close()
         workerPool.join()
         cm = ConfusionMatrix()
         for r in result:
             cm.combine(r.confmat)
         elapsedTimeMP = time.time() - startTime
         cm.toFile(os.path.join(expdir, 'conf_matrix.cm'))
         # create the pdf table and move it into the log directory
         # this is a dirty hack since pdflatex apparently
         # does not support arbitrary output paths
         pdfname = 'conf_matrix'
         logger.info('creating pdf if confusion matrix...')
         cm.toPDF(pdfname)
         os.rename('%s.pdf' % pdfname, os.path.join(expdir, '%s.pdf' % pdfname))
     except (KeyboardInterrupt, SystemExit, SystemError):
         logger.critical("Caught KeyboardInterrupt, terminating workers")
         workerPool.terminate()
         workerPool.join()
         exit(1)
     except: