def test_compile_features_for_word(self): context = {-2: set([("foo", 1), ("bar", 1), ("dur", 0.377)]), -1: set([("fii", 1), ("dur", 0.553)]), +1: set([("</s>", 1)])} local_feature_and_dur_seq = [(set([("bii", 1), ("boo", 1)]), 5), (set([("buu", 1), ("bee", 1)]), 6)] result = durmodel_utils.compile_features_for_word(context, local_feature_and_dur_seq) #print result[0] self.assertEqual(result[0][1], 5) self.assert_(("pos-2:foo", 1) in result[0][0]) self.assert_(("bii", 1) in result[0][0]) self.assert_(("pos-1:dur", 0.553) in result[0][0]) self.assert_(("pos-2:fii", 1) in result[1][0]) self.assert_(("pos+2:</s>", 1) in result[-2][0]) self.assert_(("pos+1:</s>", 1) in result[-1][0]) self.assert_(("pos-1:dur", durmodel_utils.dur_function(5)) in result[-1][0])
def get_X_raw_utt(utt_features_and_durs, feature_dict): X_raw_utt = np.zeros( (len(utt_features_and_durs), len(feature_dict)), dtype=np.float16) prev_duration = 5 for i, (_features, _dur) in enumerate(utt_features_and_durs): X_raw_utt[i, 0] = durmodel_utils.dur_function(prev_duration) for feature_name, value in _features: feature_id = feature_dict.get(feature_name, -1) if feature_id >= 0: X_raw_utt[i, feature_id] = value prev_duration = _dur return X_raw_utt
def _test_linear_lattice(self): seqs = [] words = ['<eps>', 'komm', 'Tanel'] # komm seqs.append(( 1, '59_59_59_59_59_93_93_93_93_93_93_93_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80' .split('_'))) # SIL seqs.append((0, '6_6_6_6_6_6_6'.split('_'))) # Tanel seqs.append(( 2, '135_135_135_135_135_135_135_135_13_13_13_13_13_13_13_13_13_13_85_85_85_85_85_85_85_25_25_25_25_25_25_25_25_25_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68' .split('_'))) phone_map = { 6: 'SIL', 13: 'a', 25: 'e', 59: 'k', 68: 'l', 93: 'o', 80: 'mm', 85: 'n', 135: 't' } features_and_durs = [] for (word, seq) in seqs: features_and_durs.append( durmodel_utils.make_local(word, seq, phone_map, words)) # print features_and_durs full_features_and_durs = durmodel_utils.make_linear(features_and_durs) self.assertEqual(8, len(full_features_and_durs)) print(full_features_and_durs[1][0]) self.assert_( 'pos-1:dur=%f' % durmodel_utils.dur_function(5) in full_features_and_durs[1][0]) for (f, d) in full_features_and_durs: print('%d %s' % (d, ' '.join(f)))
def _test_linear_lattice(self): seqs = [] words = ["<eps>", "komm", "Tanel"] #komm seqs.append((1, "59_59_59_59_59_93_93_93_93_93_93_93_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80".split("_"))) #SIL seqs.append((0, "6_6_6_6_6_6_6".split("_"))) #Tanel seqs.append((2, "135_135_135_135_135_135_135_135_13_13_13_13_13_13_13_13_13_13_85_85_85_85_85_85_85_25_25_25_25_25_25_25_25_25_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68".split("_"))) phone_map = {6: "SIL", 13: "a", 25: "e", 59 : "k", 68: "l", 93 : "o", 80: "mm", 85 : "n", 135 : "t"} features_and_durs = [] for (word, seq) in seqs: features_and_durs.append(durmodel_utils.make_local(word, seq, phone_map, words)) #print features_and_durs full_features_and_durs = durmodel_utils.make_linear(features_and_durs) self.assertEqual(8, len(full_features_and_durs)) print full_features_and_durs[1][0] self.assert_("pos-1:dur=%f" % durmodel_utils.dur_function(5) in full_features_and_durs[1][0]) for (f, d) in full_features_and_durs: print "%d %s" % (d, " ".join(f))
def test_compile_features_for_word(self): context = { -2: set([('foo', 1), ('bar', 1), ('dur', 0.377)]), -1: set([('fii', 1), ('dur', 0.553)]), +1: set([('</s>', 1)]) } local_feature_and_dur_seq = [(set([('bii', 1), ('boo', 1)]), 5), (set([('buu', 1), ('bee', 1)]), 6)] result = durmodel_utils.compile_features_for_word( context, local_feature_and_dur_seq) # print result[0] self.assertEqual(result[0][1], 5) self.assert_(('pos-2:foo', 1) in result[0][0]) self.assert_(('bii', 1) in result[0][0]) self.assert_(('pos-1:dur', 0.553) in result[0][0]) self.assert_(('pos-2:fii', 1) in result[1][0]) self.assert_(('pos+2:</s>', 1) in result[-2][0]) self.assert_(('pos+1:</s>', 1) in result[-1][0]) self.assert_(('pos-1:dur', durmodel_utils.dur_function(5)) in result[-1][0])
def test_compile_features_for_word(self): context = { -2: set([("foo", 1), ("bar", 1), ("dur", 0.377)]), -1: set([("fii", 1), ("dur", 0.553)]), +1: set([("</s>", 1)]) } local_feature_and_dur_seq = [(set([("bii", 1), ("boo", 1)]), 5), (set([("buu", 1), ("bee", 1)]), 6)] result = durmodel_utils.compile_features_for_word( context, local_feature_and_dur_seq) #print result[0] self.assertEqual(result[0][1], 5) self.assert_(("pos-2:foo", 1) in result[0][0]) self.assert_(("bii", 1) in result[0][0]) self.assert_(("pos-1:dur", 0.553) in result[0][0]) self.assert_(("pos-2:fii", 1) in result[1][0]) self.assert_(("pos+2:</s>", 1) in result[-2][0]) self.assert_(("pos+1:</s>", 1) in result[-1][0]) self.assert_(("pos-1:dur", durmodel_utils.dur_function(5)) in result[-1][0])
def _test_linear_lattice(self): seqs = [] words = ["<eps>", "komm", "Tanel"] #komm seqs.append(( 1, "59_59_59_59_59_93_93_93_93_93_93_93_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80_80" .split("_"))) #SIL seqs.append((0, "6_6_6_6_6_6_6".split("_"))) #Tanel seqs.append(( 2, "135_135_135_135_135_135_135_135_13_13_13_13_13_13_13_13_13_13_85_85_85_85_85_85_85_25_25_25_25_25_25_25_25_25_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68_68" .split("_"))) phone_map = { 6: "SIL", 13: "a", 25: "e", 59: "k", 68: "l", 93: "o", 80: "mm", 85: "n", 135: "t" } features_and_durs = [] for (word, seq) in seqs: features_and_durs.append( durmodel_utils.make_local(word, seq, phone_map, words)) #print features_and_durs full_features_and_durs = durmodel_utils.make_linear(features_and_durs) self.assertEqual(8, len(full_features_and_durs)) print full_features_and_durs[1][0] self.assert_( "pos-1:dur=%f" % durmodel_utils.dur_function(5) in full_features_and_durs[1][0]) for (f, d) in full_features_and_durs: print "%d %s" % (d, " ".join(f))
with codecs.open(args.write_features_filename, 'w', encoding="UTF-8") as f: for feature in feature_dict: print >> f, feature raw_data_X = [] for utt_features_and_durs in all_features_and_durs: utt_raw_data_X = np.zeros( (len(utt_features_and_durs), len(feature_dict)), dtype=np.float16) prev_duration = 5 for (i, (_features, _dur)) in enumerate(utt_features_and_durs): utt_raw_data_X[i, 0] = durmodel_utils.dur_function(prev_duration) for feature in _features: (feature_name, value) = feature feature_id = feature_dict.get(feature_name, -1) if feature_id >= 0: utt_raw_data_X[i, feature_id] = value prev_duration = _dur raw_data_X.append(utt_raw_data_X) raw_data_y = [] for utt_features_and_durs in all_features_and_durs: utt_raw_data_y = np.zeros((len(utt_features_and_durs), 2), dtype=np.int16) utt_raw_data_y[:, 0] = np.array( [d for (_, d) in utt_features_and_durs])