prepare_data.prepare_data(datadir=database_cfg['test_data'], featdir=os.path.join(database_cfg['test_dir'], feat_cfg['name']), conf=feat_cfg) print '------- computing cmvn stats ----------' prepare_data.compute_cmvn( featdir=os.path.join(database_cfg['test_dir'], feat_cfg['name'])) with open(os.path.join(database_cfg['test_dir'], feat_cfg['name'], 'dim'), 'w') as fid: fid.write(str(input_dim)) #shuffle the training data on disk print '------- shuffling examples ----------' prepare_data.shuffle_examples( os.path.join(database_cfg['train_dir'], feat_cfg['name'])) #create the text normalizer normalizer = normalizer_factory.factory(database_cfg['normalizer']) print '------- normalizing training targets -----------' sourcefile = database_cfg['traintext'] target_fid = open(os.path.join(database_cfg['train_dir'], 'targets'), 'w') #read the textfile line by line, normalize and write in target file with open(sourcefile) as fid: for line in fid.readlines(): splitline = line.strip().split(' ') utt_id = splitline[0] trans = ' '.join(splitline[1:]) normalized = normalizer(trans)
print '------- computing cmvn stats ----------' prepare_data.compute_cmvn(featdir=database_cfg['dev_features'] + '/' + feat_cfg['name']) #compute the features of the test set for testing print '------- computing testing features ----------' prepare_data.prepare_data(datadir=database_cfg['test_data'], featdir=database_cfg['test_features'] + '/' + feat_cfg['name'], conf=feat_cfg) print '------- computing cmvn stats ----------' prepare_data.compute_cmvn(featdir=database_cfg['test_features'] + '/' + feat_cfg['name']) #get the feature dim reader = ark.ArkReader(database_cfg['train_features'] + '/' + feat_cfg['name'] + '/feats.scp') _, features, _ = reader.read_next_utt() input_dim = features.shape[1] fid = open(database_cfg['train_features'] + '/' + feat_cfg['name'] + '/dim', 'w') fid.write(str(input_dim)) fid.close() #shuffle the training data on disk print '------- shuffling examples ----------' prepare_data.shuffle_examples(database_cfg['train_features'] + '/' + feat_cfg['name'])