示例#1
0
    def __init__(self, conf):
        '''
        FeatureComputer constructor

        Args:
            conf: the feature configuration
        '''

        #default conf file
        default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'defaults',
                               type(self).__name__.lower() + '.cfg')

        #apply the defaults
        if os.path.exists(default):
            default_conf(conf, default)

        self.conf = dict(conf.items('features'))
示例#2
0
文件: coder.py 项目: vrenkens/assist
    def __init__(self, structure, conf):
        '''Coder constructor

        Args:
            structure: a Structure object
        '''

        #default conf file
        default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'defaults',
                               type(self).__name__.lower() + '.cfg')

        #apply the defaults
        if os.path.exists(default):
            default_conf(conf, default)

        self.structure = structure
        self.conf = dict(conf.items('coder'))
示例#3
0
    def __init__(self, conf, coder, expdir):
        '''model constructor

        Args:
            conf: the model configuration as as dictionary of strings
            coder: an object that encodes the tasks
            expdir: the experiments directory
        '''

        #default conf file
        default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'defaults',
                               type(self).__name__.lower() + '.cfg')

        #apply the defaults
        if os.path.exists(default):
            default_conf(conf, default)

        self.conf = dict(conf.items('acquisition'))
        self.coder = coder
        self.expdir = expdir
示例#4
0
    def __init__(self, structure, conf):
        '''Coder constructor

        Args:
            structure: a Structure object
        '''

        self.structure = structure
        # TODO: Remove configargparse dep and replace with json/yaml files and plain dict
        if type(conf) != dict:
            #default conf file
            default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   'defaults',
                                   type(self).__name__.lower() + '.cfg')

            #apply the defaults
            if os.path.exists(default):
                default_conf(conf, default)

            self.conf = dict(conf.items('coder'))

        else:
            self.conf = conf
示例#5
0
def main(expdir, recipe, computing):
    '''main function'''

    overwrite = False
    if os.path.isdir(expdir):
        text = ''
        while text not in ('o', 'r'):
            text = raw_input('%s already exists, do you want to '
                             'resume experiment (r) or overwrite (o) '
                             '(respond with o or r)' % expdir)
        if text == 'o':
            overwrite = True

    else:
        #create the experiments directory
        os.makedirs(expdir)

    #copy the config files
    if overwrite:
        shutil.copyfile(os.path.join(recipe, 'acquisition.cfg'),
                        os.path.join(expdir, 'acquisition.cfg'))
    else:
        tools.safecopy(os.path.join(recipe, 'acquisition.cfg'),
                       os.path.join(expdir, 'acquisition.cfg'))

    shutil.copyfile(os.path.join(recipe, 'coder.cfg'),
                    os.path.join(expdir, 'coder.cfg'))
    shutil.copyfile(os.path.join(recipe, 'structure.xml'),
                    os.path.join(expdir, 'structure.xml'))

    shutil.copyfile(os.path.join(recipe, 'coder.cfg'),
                    os.path.join(expdir, 'coder.cfg'))
    shutil.copyfile(os.path.join(recipe, 'structure.xml'),
                    os.path.join(expdir, 'structure.xml'))
    shutil.copyfile(os.path.join(recipe, 'database.cfg'),
                    os.path.join(expdir, 'database.cfg'))
    shutil.copyfile(os.path.join(recipe, 'cross_validation_ppall.cfg'),
                    os.path.join(expdir, 'cross_validation_ppall.cfg'))

    acquisitionconf = ConfigParser()
    acquisitionconf.read(os.path.join(recipe, 'acquisition.cfg'))
    modelname = acquisitionconf.get('acquisition', 'name')

    shutil.copyfile(
        os.path.join(os.getcwd(), 'assist', 'acquisition', 'defaults',
                     modelname + '.cfg'),
        os.path.join(expdir, modelname + '.cfg'))

    #read the cross_validation config file
    expconf = ConfigParser()
    expconf.read(os.path.join(recipe, 'cross_validation.cfg'))

    #default conf file
    default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'defaults', 'cross_validation.cfg')

    #apply the defaults
    if os.path.exists(default):
        tools.default_conf(expconf, default)

    expconf = dict(expconf.items('cross_validation'))

    #read the data config file
    if not os.path.exists(os.path.join(recipe, 'database.cfg')):
        raise Exception('cannot find database.cfg in %s' % recipe)

    dataconf = ConfigParser()
    dataconf.read(os.path.join(recipe, 'database.cfg'))

    #read the coder config file
    coderconf = ConfigParser()
    coderconf.read(os.path.join(expdir, 'coder.cfg'))

    # filter out all speakers with less than 100 examples
    # (in the FluentSpeechCommands dataset (~20%))
    bad_spks = []
    if os.path.exists(os.path.join(recipe, 'FS_linecounts.txt')):
        for l in open(os.path.join(recipe, 'FS_linecounts.txt')):
            splitline = l.strip().split(' ')
            if int(splitline[1]) < 100:
                bad_spks.append(splitline[0])

    print bad_spks

    for speaker in dataconf.sections():
        if speaker in bad_spks:
            continue

        print 'speaker: %s' % (speaker)

        #create the speaker directory
        if os.path.isdir(os.path.join(expdir, speaker)):
            if overwrite:
                shutil.rmtree(os.path.join(expdir, speaker))
                os.makedirs(os.path.join(expdir, speaker))
        else:
            os.makedirs(os.path.join(expdir, speaker))

        #create a task structure file
        structure = Structure(os.path.join(expdir, 'structure.xml'))

        #create a coder
        coder = coder_factory.factory(coderconf.get('coder',
                                                    'name'))(structure,
                                                             coderconf)

        #read and code all the tasks
        labelvecs = []
        names = []
        taskstrings = dict()
        for line in open(dataconf.get(speaker, 'tasks')):
            splitline = line.strip().split(' ')
            name = speaker + '_' + splitline[0]
            names.append(name)
            taskstring = ' '.join(splitline[1:])
            taskstrings[name] = taskstring
            task = read_task(taskstring)
            labelvecs.append(coder.encode(task))

        #devide the data into blocks
        blocksfile = os.path.join(expdir, speaker, 'blocks.pkl')
        if os.path.exists(blocksfile):
            with open(blocksfile, 'rb') as fid:
                blocks = pickle.load(fid)
        else:
            blocks = make_blocks(np.array(labelvecs), expconf,
                                 dataconf.get(speaker, 'features'))
            with open(blocksfile, 'wb') as fid:
                pickle.dump(blocks, fid)

        #create train-testsets for all experiments

        #seed the random number generator
        random.seed(3105)
        trainids = [None] * (len(blocks) - 1)
        testids = [None] * (len(blocks) - 1)
        for b in range(len(blocks) - 1):
            trainids[b] = [None] * int(expconf['numexp'])
            testids[b] = [None] * int(expconf['numexp'])
            for e in range(int(expconf['numexp'])):
                trainids[b][e] = list(
                    itertools.chain.from_iterable(random.sample(blocks,
                                                                b + 1)))
                testids[b][e] = [
                    x for x in range(len(names)) if x not in trainids[b][e]
                ]

        #read the feature files
        features = dict()
        for l in open(os.path.join(dataconf.get(speaker, 'features'),
                                   'feats')):
            splitline = l.strip().split(' ')
            featname = speaker + '_' + splitline[0]
            features[featname] = ' '.join(splitline[1:])

        #create an expdir for each experiment
        b = int(expconf['startblocks']) - 1

        while True:
            for e in range(int(expconf['numexp'])):

                print '     train blocks: %d, experiment %s' % (b + 1, e)

                #creat the directory
                subexpdir = os.path.join(expdir, speaker,
                                         '%dblocks_exp%d' % (b + 1, e))

                if os.path.exists(os.path.join(subexpdir, 'f1')):
                    continue

                if not os.path.isdir(subexpdir):
                    os.makedirs(subexpdir)

                #create pointers to the config files
                tools.symlink(os.path.join(expdir, 'acquisition.cfg'),
                              os.path.join(subexpdir, 'acquisition.cfg'))
                tools.symlink(os.path.join(expdir, 'coder.cfg'),
                              os.path.join(subexpdir, 'coder.cfg'))
                tools.symlink(os.path.join(expdir, 'structure.xml'),
                              os.path.join(subexpdir, 'structure.xml'))
                tools.symlink(os.path.join(expdir, 'database.cfg'),
                              os.path.join(subexpdir, 'database.cfg'))

                if not os.path.exists(os.path.join(subexpdir, 'trainfeats')):
                    trainutts = [names[i] for i in trainids[b][e]]
                    print 'number of examples: %d' % len(trainutts)
                    testutts = [names[i] for i in testids[b][e]]

                    #create the train and test sets
                    tools.writefile(os.path.join(subexpdir, 'trainfeats'),
                                    {utt: features[utt]
                                     for utt in trainutts})
                    tools.writefile(
                        os.path.join(subexpdir, 'traintasks'),
                        {utt: taskstrings[utt]
                         for utt in trainutts})
                    tools.writefile(os.path.join(subexpdir, 'testfeats'),
                                    {utt: features[utt]
                                     for utt in testutts})
                    tools.writefile(
                        os.path.join(subexpdir, 'testtasks'),
                        {utt: taskstrings[utt]
                         for utt in testutts})

                if computing in ('condor', 'condor_gpu'):
                    #create the outputs directory
                    if not os.path.isdir(os.path.join(subexpdir, 'outputs')):
                        os.makedirs(os.path.join(subexpdir, 'outputs'))

                    if computing == 'condor_gpu':
                        jobfile = 'run_script_GPU.job'
                    else:
                        jobfile = 'run_script.job'

                    #only submit the job if it not running yet
                    in_queue = os.popen(
                        'if condor_q -nobatch -wide | grep -q %s; '
                        'then echo true; else echo false; fi' %
                        subexpdir).read().strip() == 'true'

                    #submit the condor job
                    if not in_queue:
                        os.system('condor_submit expdir=%s script=train_test'
                                  ' assist/condor/%s' % (subexpdir, jobfile))
                else:
                    train_test.main(subexpdir)

            newb = (b + 1) * int(expconf['scale']) + int(
                expconf['increment']) - 1
            newb = min(newb, len(blocks) - 2)
            if b == newb:
                break
            else:
                b = newb
def main(expdir, recipe, computing):
    '''main function'''

    overwrite = False
    if os.path.isdir(expdir):
        text = ''
        while text not in ('o', 'r'):
            text = raw_input('%s already exists, do you want to '
                             'resume experiment (r) or overwrite (o) '
                             '(respond with o or r)' % expdir)
        if text == 'o':
            overwrite = True

    else:
        #create the experiments directory
        os.makedirs(expdir)

    #copy the config files
    if overwrite:
        shutil.copyfile(os.path.join(recipe, 'acquisition.cfg'),
                        os.path.join(expdir, 'acquisition.cfg'))
    else:
        tools.safecopy(os.path.join(recipe, 'acquisition.cfg'),
                       os.path.join(expdir, 'acquisition.cfg'))

    shutil.copyfile(os.path.join(recipe, 'coder.cfg'),
                    os.path.join(expdir, 'coder.cfg'))
    shutil.copyfile(os.path.join(recipe, 'structure.xml'),
                    os.path.join(expdir, 'structure.xml'))
    shutil.copyfile(os.path.join(recipe, 'database.cfg'),
                    os.path.join(expdir, 'database.cfg'))
    shutil.copyfile(os.path.join(recipe, 'cross_validation_ppall.cfg'),
                    os.path.join(expdir, 'cross_validation_ppall.cfg'))

    acquisitionconf = ConfigParser()
    acquisitionconf.read(os.path.join(recipe, 'acquisition.cfg'))
    modelname = acquisitionconf.get('acquisition', 'name')
    shutil.copyfile(
        os.path.join(os.getcwd(), 'assist', 'acquisition', 'defaults',
                     modelname + '.cfg'),
        os.path.join(expdir, modelname + '.cfg'))

    #read the cross_validation config file
    expconf = ConfigParser()
    expconf.read(os.path.join(recipe, 'cross_validation_ppall.cfg'))

    #default conf file
    default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'defaults', 'cross_validation_ppall.cfg')

    #apply the defaults
    if os.path.exists(default):
        tools.default_conf(expconf, default)

    expconf = dict(expconf.items('cross_validation_ppall'))

    #read the data config file
    if not os.path.exists(os.path.join(recipe, 'database.cfg')):
        raise Exception('cannot find database.cfg in %s' % recipe)

    dataconf = ConfigParser()
    dataconf.read(os.path.join(recipe, 'database.cfg'))

    #read the coder config file
    coderconf = ConfigParser()
    coderconf.read(os.path.join(expdir, 'coder.cfg'))

    # for word specific thresholds (not used anymore)
    #if os.path.isfile(os.path.join(recipe,'word_thresholds.pkl')):
    #    print('File with wordthresholds found in recipe')
    #    shutil.copyfile(os.path.join(recipe, 'word_thresholds.pkl'),
    #        os.path.join(expdir, 'word_thresholds.pkl'))
    #    thresholdsarepresent = True
    #else:
    #    print('No file found with wordthresholds, using a fixed one')
    #    thresholdsarepresent = False

    labelvecs = []
    names = []
    taskstrings = dict()
    features = dict()

    print 'Searching for all speakers...'

    for speaker in dataconf.sections():

        print '     speaker: %s' % (speaker)

        #create a task structure file
        structure = Structure(os.path.join(expdir, 'structure.xml'))
        #create a coder
        coder = coder_factory.factory(coderconf.get('coder',
                                                    'name'))(structure,
                                                             coderconf)
        # typesplit_coder.py line 51 to see all labels and corresponding output capsule numbers

        #read and code all the tasks
        for line in open(
                dataconf.get(speaker, 'tasks')
        ):  #'recording1_Voice_10 <move_rel direction="forward" distance="little" throttle="fast" />'
            splitline = line.strip().split(' ')
            name = speaker + '_' + splitline[0]  #'recording1_Voice_10'
            names.append(name)
            taskstring = ' '.join(
                splitline[1:]
            )  #'<move_rel_direction="forwqrd" distance="little" throttle="fast"/>'
            taskstrings[name] = taskstring
            task = read_task(taskstring)
            labelvecs.append(coder.encode(task))

        # read the feature files
        for l in open(os.path.join(dataconf.get(speaker, 'features'),
                                   'feats')):
            splitline = l.strip().split(
                ' '
            )  #['recording1_Voice_10', '/esat/spchtemp/scratch/r0580562/databases/grabo_features/pp2/recording1_Voice_10.npy']
            featname = speaker + '_' + splitline[0]
            features[featname] = ' '.join(splitline[1:])

    print 'Devide data into blocks...'
    #devide the data into blocks, look for existing blocksfile in recipe because takes a very long time to make!!!
    blocksfile = os.path.join(recipe, 'blocks.pkl')
    if os.path.exists(blocksfile):
        print 'Loading found blocks file (check if number of blocks is still the same)'
        with open(blocksfile, 'rb') as fid:
            blocks = pickle.load(fid)
    else:
        print 'No blocksfile found in  recipe, making new one'
        blocks = make_blocks(
            np.array(labelvecs), expconf, expdir
        )  #massive list, matrix of [[..,..,..,..],[..,...    .]]  numbers between 1-350 approx
        with open(blocksfile, 'wb') as fid:
            pickle.dump(blocks, fid)

    print 'Shuffle speakers...'

    # look for existing train and test sets and load them in ('saved_ids' in recipe), because takes a very long time to make!!!

    sets_properties = {}
    if os.path.isdir(os.path.join(recipe, 'saved_ids')):
        saved_ids = ConfigParser()
        saved_ids.read(
            os.path.join(recipe, 'saved_ids', 'cross_validation_ppall.cfg'))
        sets_properties = dict(saved_ids.items('cross_validation_ppall'))
    else:
        sets_properties['numblocks'] = 0
        sets_properties['numexp'] = 0

    if (sets_properties['numblocks']
            == expconf['numblocks']) and (sets_properties['numexp']
                                          == expconf['numexp']):
        print '     Loading found test recipe'
        trainids_saved = os.path.join(recipe, 'saved_ids', 'trainids.pkl')
        with open(trainids_saved, 'rb') as fid:
            trainids = pickle.load(fid)
        testids_saved = os.path.join(recipe, 'saved_ids', 'testids.pkl')
        with open(testids_saved, 'rb') as fid:
            testids = pickle.load(fid)
    else:
        print '     No saved test and train sets found with same crossvalidation configuration in the recipe'
        # seed the random number generator
        random.seed(3105)
        trainids = [None] * (len(blocks) - 1)  #len(blocks)=15
        testids = [None] * (len(blocks) - 1)
        print '     Number of blocks: %d' % (len(blocks))
        b = 0
        while b < (len(blocks) - 1):
            #for b in range(len(blocks)-1):
            print '         block %d' % b
            trainids[b] = [None] * int(expconf['numexp'])
            testids[b] = [None] * int(expconf['numexp'])
            for e in range(int(expconf['numexp'])):
                trainids[b][e] = list(
                    itertools.chain.from_iterable(random.sample(blocks,
                                                                b + 1)))
                testids[b][e] = [
                    x for x in range(len(names)) if x not in trainids[b][e]
                ]
            #scale factor to use more smaller blocks and less bigger blocks (for the curve, it saturates)
            newb = int(
                np.floor((b + 1) * float(expconf['scale']) +
                         int(expconf['increment']) - 1))
            newb = min(newb, len(blocks) - 2)
            if b == newb:
                break
            else:
                b = newb

        os.makedirs(os.path.join(expdir, 'saved_ids'))
        trainids_saved = os.path.join(expdir, 'saved_ids', 'trainids.pkl')
        testids_saved = os.path.join(expdir, 'saved_ids', 'testids.pkl')
        with open(trainids_saved, 'wb') as fid:
            pickle.dump(trainids, fid)
        with open(testids_saved, 'wb') as fid:
            pickle.dump(testids, fid)
        shutil.copyfile(
            os.path.join(recipe, 'cross_validation_ppall.cfg'),
            os.path.join(expdir, 'saved_ids', 'cross_validation_ppall.cfg'))

    #create an expdir for each experiment
    b = int(expconf['startblocks']) - 1  #0

    print 'Launch the experiments...'
    while True:
        for e in range(int(expconf['numexp'])):

            print '     train blocks: %d, experiment %s' % (b + 1, e)

            #creat the directory
            subexpdir = os.path.join(expdir, '%dblocks_exp%d' % (b + 1, e))

            if os.path.exists(os.path.join(subexpdir, 'f1')):
                continue

            if not os.path.isdir(subexpdir):
                os.makedirs(subexpdir)

            #create pointers to the config files
            tools.symlink(os.path.join(expdir, 'acquisition.cfg'),
                          os.path.join(subexpdir, 'acquisition.cfg'))
            tools.symlink(os.path.join(expdir, 'coder.cfg'),
                          os.path.join(subexpdir, 'coder.cfg'))
            tools.symlink(os.path.join(expdir, 'structure.xml'),
                          os.path.join(subexpdir, 'structure.xml'))
            tools.symlink(os.path.join(expdir, 'database.cfg'),
                          os.path.join(subexpdir, 'database.cfg'))
            #if thresholdsarepresent:
            #    tools.symlink(os.path.join(expdir, 'word_thresholds.pkl'),
            #                  os.path.join(subexpdir, 'word_thresholds.pkl'))

            if not os.path.exists(os.path.join(subexpdir, 'trainfeats')):
                trainutts = [names[i] for i in trainids[b][e]]
                print 'number of examples: %d' % len(trainutts)
                testutts = [names[i] for i in testids[b][e]]

                #create the train and test sets
                tools.writefile(os.path.join(subexpdir, 'trainfeats'),
                                {utt: features[utt]
                                 for utt in trainutts})
                tools.writefile(os.path.join(subexpdir, 'traintasks'),
                                {utt: taskstrings[utt]
                                 for utt in trainutts})
                tools.writefile(os.path.join(subexpdir, 'testfeats'),
                                {utt: features[utt]
                                 for utt in testutts})
                tools.writefile(os.path.join(subexpdir, 'testtasks'),
                                {utt: taskstrings[utt]
                                 for utt in testutts})

            if computing in ('condor', 'condor_gpu'):
                #create the outputs directory
                if not os.path.isdir(os.path.join(subexpdir, 'outputs')):
                    os.makedirs(os.path.join(subexpdir, 'outputs'))

                if computing == 'condor_gpu':
                    jobfile = 'run_script_GPU.job'
                else:
                    jobfile = 'run_script.job'

                #only submit the job if it not running yet
                in_queue = os.popen('if condor_q -nobatch -wide | grep -q %s; '
                                    'then echo true; else echo false; fi' %
                                    subexpdir).read().strip() == 'true'

                #submit the condor job
                if not in_queue:
                    os.system('condor_submit expdir=%s script=train_test'
                              ' assist/condor/%s' % (subexpdir, jobfile))
            else:
                train_test.main(subexpdir)

        newb = int(
            np.floor((b + 1) * float(expconf['scale']) +
                     int(expconf['increment']) - 1))
        newb = min(newb, len(blocks) - 2)
        if b == newb:
            break
        else:
            b = newb