Пример #1
0
# read soem percent of data to be used for supervised training....
train_x, train_y = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.10)

# read almost the full data to be used for unsupervised training .....
train_x_full, train_y_full = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.99)
valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48)
test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48)

#  stack the list of data to make on single matrix of trianing data ...
train_x_all = np.vstack(train_x)
train_y_all = np.hstack(train_y)
train_x_unsup = np.vstack(train_x_full)
train_y_unsup = np.hstack(train_y_full)

train_x_all, train_y_all = timit.shared_dataset((train_x_all, train_y_all))
train_x_unsup, train_y_unsup = timit.shared_dataset((train_x_unsup, train_y_unsup))

train_y = map(lambda x: map_y_48(x), train_y)
valid_y, test_y = map_y_48(valid_y), map_y_48(test_y)

train_x, train_y  = timit.make_shared_partitions(train_x, train_y)
valid_x, valid_y = timit.shared_dataset((valid_x, valid_y))
test_x, test_y = timit.shared_dataset((test_x, test_y))

train_set_x = train_x_unsup
print train_x_all.get_value().shape[0]
print train_set_x.get_value().shape[0]

# nn_ae = DNN(numpy_rng, [5096, 5096], 429, 144)
# nn_ae = DNN(numpy_rng, [6000, 6000], 429, 39)
Пример #2
0
# nn = DNN(numpy_rng, [6096, 6096], 429, 144)
nn = DNN(numpy_rng, [hu,], 429, 48)
#nn = DNN(numpy_rng, [10096], 1320, 48)
MODE = 'usevalid'

train_x, train_y = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, percent_data=percent, randomise=True)
valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48)
test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48)

#train_y = map(lambda x: map_y_48(x), train_y)
#valid_y, test_y = map_y_48(valid_y), map_y_48(test_y)

# this mode uses the standard validation set 
if MODE == 'usevalid':
	train_x, train_y  = timit.make_shared_partitions(train_x, train_y)
	valid_x, valid_y = timit.shared_dataset((valid_x, valid_y))
	test_x, test_y = timit.shared_dataset((test_x, test_y))
	num_partitions = len(train_x)
	print num_partitions


	for i in xrange(num_partitions):
		train_set_x = train_x[i]
		train_set_y = train_y[i]
		train_set_xy = (train_set_x, train_set_y)
		timit = [train_set_xy, (valid_x, valid_y), (test_x, test_y)]
		bsgd(nn, timit, epochs=80, lr=0.008)

else:
	print len(train_x)
	num_partitions = len(train_x)