Python GroundTruth.load示例，gaia2.classification.GroundTruth.load Python示例

示例#1

0

显示文件

文件： classificationtask.py 项目： shudct/gaia

    def run(self, className, outfilename, param, dsname, gtname, evalconfig):

        try:
            classifier = param['classifier']
            gt = GroundTruth(classifier)
            gt.load(gtname)

            # force the GroundTruth class name to be the one specified by our project file, not
            # the one in the original groundtruth file
            gt.className = className

            ds = DataSet()
            ds.load(dsname)

            # some points may have failed to be analyzed, remove those from the GroundTruth
            pnames = ds.pointNames()
            for pid in list(gt.keys()):
                if pid not in pnames:
                    log.warning(
                        'Removing %s from GroundTruth as it could not be found in the merged dataset'
                        % pid)
                    del gt[pid]

            trainerFun, trainingparam, newds = getTrainer(
                classifier, param, ds)

            # run all the evaluations specified in the evaluation config
            for i, evalparam in enumerate(evalconfig):
                # if we already ran this evaluation, no need to run it again...
                resultFilename = outfilename + '_%d.result' % i
                if exists(resultFilename):
                    log.info('File %s already exists. Skipping evaluation...' %
                             resultFilename)
                    continue

                log.info(
                    'Running evaluation %d for: %s with classifier %s and dataset %s'
                    % (i, outfilename, param['classifier'],
                       param['preprocessing']))
                log.info('    PID: %d, parameters: %s' %
                         (os.getpid(), json.dumps(param)))

                # run evaluation
                confusion = evaluateNfold(evalparam['nfold'], ds, gt,
                                          trainerFun, **trainingparam)

                # write evaluation params & result
                with open(outfilename + '_%d.param' % i, 'w') as f:
                    yaml.dump({'model': param, 'evaluation': evalparam}, f)

                confusion.save(resultFilename)

        except Exception:
            log.error(
                'While doing evaluation with param = %s\nevaluation = %s' %
                (param, evalconfig))
            raise

示例#2

0

显示文件

文件： classificationtask.py 项目： MTG/gaia

    def run(self, className, outfilename, param, dsname, gtname, evalconfig):

        try:
            classifier = param['classifier']
            gt = GroundTruth(classifier)
            gt.load(gtname)

            # force the GroundTruth class name to be the one specified by our project file, not
            # the one in the original groundtruth file
            gt.className = className

            ds = DataSet()
            ds.load(dsname)

            # some points may have failed to be analyzed, remove those from the GroundTruth
            pnames = ds.pointNames()
            for pid in list(gt.keys()):
                if pid not in pnames:
                    log.warning('Removing %s from GroundTruth as it could not be found in the merged dataset' % pid)
                    del gt[pid]

            trainerFun, trainingparam, newds = getTrainer(classifier, param, ds)

            # run all the evaluations specified in the evaluation config
            for i, evalparam in enumerate(evalconfig):
                # if we already ran this evaluation, no need to run it again...
                resultFilename = outfilename + '_%d.result' % i
                if exists(resultFilename):
                    log.info('File %s already exists. Skipping evaluation...' % resultFilename)
                    continue

                log.info('Running evaluation %d for: %s with classifier %s and dataset %s' % (i, outfilename,
                                                                                              param['classifier'],
                                                                                              param['preprocessing']))
                log.info('    PID: %d, parameters: %s' % (os.getpid(), json.dumps(param)))

                # run evaluation
                confusion = evaluateNfold(evalparam['nfold'], ds, gt, trainerFun, **trainingparam)

                # write evaluation params & result
                with open(outfilename + '_%d.param' % i, 'w') as f:
                    yaml.dump({ 'model': param, 'evaluation': evalparam }, f)

                confusion.save(resultFilename)

        except Exception:
            log.error('While doing evaluation with param = %s\nevaluation = %s' % (param, evalconfig))
            raise

示例#3

0

显示文件

文件： validate_collection.py 项目： vishalbelsare/gaia

def validate(basedir):
    # let's start with some basic check
    print('Checking basic directory layout...')
    if not exists(basedir):
        raise Exception('The specified base directory does not exist')

    # check required metadata files are there and that they're valid
    configFile = join(basedir, 'metadata', 'config.yaml')
    if not exists(configFile):
        raise Exception(
            'config.yaml could not be found in the metadata/ folder')

    config = yaml.load(open(configFile).read())
    if 'version' not in config:
        raise Exception('config.yaml doesn\'t have a version number')

    # check that the specified audioFormats correspond to the audio/ subfolders
    print('Checking available audio formats...')
    audioFormats = config['audioFormats']
    if not audioFormats:
        raise Exception('audioFormats not specified in config.yaml')

    audioFolders = [
        basename(f) for f in glob.glob(join(basedir, 'audio', '*'))
    ]

    if len(audioFolders) != len(audioFormats):
        raise Exception(
            'Some audio folders are not described in the audioFormats section of the config.yaml'
        )

    print('Found formats:', str(audioFolders))

    # check the audio formats are valid, in particular that they have a valid filelist
    for format, desc in audioFormats.items():
        print("\nChecking format '%s':" % format)
        # TODO: at some point in the future we should also check for valid values in desc
        if not exists(join(basedir, 'audio', format)):
            raise Exception(
                '%s is listed as an audio format, but doesn\'t appear in the audio/ folder'
                % format)

        if 'filelist' not in desc:
            raise Exception('Audio format "%s" does not define a filelist' %
                            format)

        filelist = yaml.load(
            open(join(basedir, 'metadata', desc['filelist'])).read())
        print('  filelist OK, lists %d files' % len(filelist))

        for pid, filename in filelist.items():
            fullpath = join(basedir, 'audio', format, filename)
            if not exists(fullpath):
                raise Exception(
                    'For format "%s": file "%s" appears in filelist, however there is no corresponding "%s"'
                    % (format, filename, fullpath))

    # check that the groundtruth files do actually exist if they are specified
    print('\nChecking groundtruth files...')
    groundTruth = config.get('groundTruth', {})
    print('Found groundtruth files:', str(list(groundTruth.keys())))
    for name, gtfile in groundTruth.items():
        print("\nChecking groundtruth '%s':" % name)
        gt = GroundTruth('')
        gt.load(join(basedir, 'metadata', gtfile))
        # check that the IDs used in the groundtruth files exist in all the filelists
        for afname, af in audioFormats.items():
            flist = yaml.load(
                open(join(basedir, 'metadata', af['filelist'])).read()).keys()
            for gid in gt:
                if gid not in flist:
                    raise Exception(
                        "ID '%s' is in GroundTruth '%s', but could not be found in filelist for audio format '%s'"
                        % (gid, gtfile, afname))
        print('  gt filelist OK, found classes:',
              str(sorted(set(gt.values()))))

示例#4

0

显示文件

文件： validate_collection.py 项目： DomT4/gaia

def validate(basedir):
    # let's start with some basic check
    print 'Checking basic directory layout...'
    if not exists(basedir):
        raise Exception('The specified base directory does not exist')

    # check required metadata files are there and that they're valid
    configFile = join(basedir, 'metadata', 'config.yaml')
    if not exists(configFile):
        raise Exception('config.yaml could not be found in the metadata/ folder')

    config = yaml.load(open(configFile).read())
    if 'version' not in config:
        raise Exception('config.yaml doesn\'t have a version number')


    # check that the specified audioFormats correspond to the audio/ subfolders
    print 'Checking available audio formats...'
    audioFormats = config['audioFormats']
    if not audioFormats:
        raise Exception('audioFormats not specified in config.yaml')

    audioFolders = [ basename(f) for f in glob.glob(join(basedir, 'audio', '*')) ]

    if len(audioFolders) != len(audioFormats):
        raise Exception('Some audio folders are not described in the audioFormats section of the config.yaml')

    print 'Found formats:', str(audioFolders)

    # check the audio formats are valid, in particular that they have a valid filelist
    for format, desc in audioFormats.items():
        print "\nChecking format '%s':" % format
        # TODO: at some point in the future we should also check for valid values in desc
        if not exists(join(basedir, 'audio', format)):
            raise Exception('%s is listed as an audio format, but doesn\'t appear in the audio/ folder' % format)

        if 'filelist' not in desc:
            raise Exception('Audio format "%s" does not define a filelist' % format)

        filelist = yaml.load(open(join(basedir, 'metadata', desc['filelist'])).read())
        print '  filelist OK, lists %d files' % len(filelist)

        for pid, filename in filelist.items():
            fullpath = join(basedir, 'audio', format, filename)
            if not exists(fullpath):
                raise Exception('For format "%s": file "%s" appears in filelist, however there is no corresponding "%s"' % (format, filename, fullpath))


    # check that the groundtruth files do actually exist if they are specified
    print '\nChecking groundtruth files...'
    groundTruth = config.get('groundTruth', {})
    print 'Found groundtruth files:', str(groundTruth.keys())
    for name, gtfile in groundTruth.items():
        print "\nChecking groundtruth '%s':" % name
        gt = GroundTruth('')
        gt.load(join(basedir, 'metadata', gtfile))
        # check that the IDs used in the groundtruth files exist in all the filelists
        for afname, af in audioFormats.items():
            flist = yaml.load(open(join(basedir, 'metadata', af['filelist'])).read()).keys()
            for gid in gt:
                if gid not in flist:
                    raise Exception("ID '%s' is in GroundTruth '%s', but could not be found in filelist for audio format '%s'" % (gid, gtfile, afname))
        print '  gt filelist OK, found classes:', str(sorted(set(gt.values())))