y_39[y_48 == 43] = 37 y_39[y_48 == 16] = 37 y_39[y_48 == 9] = 37 return y_39 BATCH_SIZE = 300 NUM_EPOCHS = 61 # mnist = mnist.load_mnist_theano('mnist.pkl.gz') numpy_rng = np.random.RandomState(11111) theano_rng = RandomStreams(numpy_rng.randint( 2**30 )) # configuration for timit # read soem percent of data to be used for supervised training.... train_x, train_y = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.10) # read almost the full data to be used for unsupervised training ..... train_x_full, train_y_full = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.99) valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48) test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48) # stack the list of data to make on single matrix of trianing data ... train_x_all = np.vstack(train_x) train_y_all = np.hstack(train_y) train_x_unsup = np.vstack(train_x_full) train_y_unsup = np.hstack(train_y_full) train_x_all, train_y_all = timit.shared_dataset((train_x_all, train_y_all)) train_x_unsup, train_y_unsup = timit.shared_dataset((train_x_unsup, train_y_unsup))
BATCH_SIZE = 400 numpy_rng = np.random.RandomState(11111) theano_rng = RandomStreams(numpy_rng.randint( 2**30 )) parser = argparse.ArgumentParser(description='setting up hyperparams by command line.') parser.add_argument('--percent', '-p', type=float, default=0.999) parser.add_argument('--beta', '-b', type=int, default=200) parser.add_argument('--alpha', '-a', type=int, default=3) args = parser.parse_args() alpha = args.alpha beta = args.beta percent = args.percent # parser.add_argument('') print "fraction of training data used is:", percent train_x_lab, train_y_lab, train_x_unlab = timit.readTIMITSSL('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=percent) # train_x_lab, train_y_lab, train_x_unlab = timit.readTIMITSSL('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.9999) valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48, percent_data=0.20, randomise=True) # test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48) # train_x, train_y = timit.make_shared_partitions(train_x, train_y) # print valid_x.shape network = SSDAE(numpy_rng, [10000, 10000], train_x_lab, train_y_lab, train_x_unlab, alpha=alpha, beta=beta) network.trainSGD(epochs = [75, 2]) # network.trainSGDSupervised(train_x_lab, train_y_lab, valid_x, valid_y, test_x, test_y) network.trainSGDSupervised(train_x_lab, train_y_lab, valid_x, valid_y)