示例#1
0
prepare_data.prepare_data(datadir=database_cfg['test_data'],
                          featdir=os.path.join(database_cfg['test_dir'],
                                               feat_cfg['name']),
                          conf=feat_cfg)

print '------- computing cmvn stats ----------'
prepare_data.compute_cmvn(
    featdir=os.path.join(database_cfg['test_dir'], feat_cfg['name']))

with open(os.path.join(database_cfg['test_dir'], feat_cfg['name'], 'dim'),
          'w') as fid:
    fid.write(str(input_dim))

#shuffle the training data on disk
print '------- shuffling examples ----------'
prepare_data.shuffle_examples(
    os.path.join(database_cfg['train_dir'], feat_cfg['name']))

#create the text normalizer
normalizer = normalizer_factory.factory(database_cfg['normalizer'])

print '------- normalizing training targets -----------'
sourcefile = database_cfg['traintext']
target_fid = open(os.path.join(database_cfg['train_dir'], 'targets'), 'w')

#read the textfile line by line, normalize and write in target file
with open(sourcefile) as fid:
    for line in fid.readlines():
        splitline = line.strip().split(' ')
        utt_id = splitline[0]
        trans = ' '.join(splitline[1:])
        normalized = normalizer(trans)
示例#2
0
文件: featprep.py 项目: uniq10/nabu
    print '------- computing cmvn stats ----------'
    prepare_data.compute_cmvn(featdir=database_cfg['dev_features'] + '/' +
                              feat_cfg['name'])

#compute the features of the test set for testing
print '------- computing testing features ----------'
prepare_data.prepare_data(datadir=database_cfg['test_data'],
                          featdir=database_cfg['test_features'] + '/' +
                          feat_cfg['name'],
                          conf=feat_cfg)

print '------- computing cmvn stats ----------'
prepare_data.compute_cmvn(featdir=database_cfg['test_features'] + '/' +
                          feat_cfg['name'])

#get the feature dim
reader = ark.ArkReader(database_cfg['train_features'] + '/' +
                       feat_cfg['name'] + '/feats.scp')
_, features, _ = reader.read_next_utt()
input_dim = features.shape[1]
fid = open(database_cfg['train_features'] + '/' + feat_cfg['name'] + '/dim',
           'w')
fid.write(str(input_dim))
fid.close()

#shuffle the training data on disk
print '------- shuffling examples ----------'
prepare_data.shuffle_examples(database_cfg['train_features'] + '/' +
                              feat_cfg['name'])