Пример #1
0
def main_theano_sign_lang_var_len_adadelta():
    """
	:description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches.
	"""
    print('loading data...')
    n_input_at_each_timestep = 10
    n_classes = 97  # no base 0 considered, there are just 98 of them. May need to be 97

    X, y = sign_lang.load_data_from_aggregate_file()
    X, masks = sign_lang.pad_data_to_max_sample_length(X)
    X = X.astype(theano.config.floatX)
    masks = masks.astype(theano.config.floatX)
    X = np.swapaxes(X, 0, 1)
    masks = np.swapaxes(masks, 0, 1)

    split_idx = int(.8 * X.shape[1])

    X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
    masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX),
                          borrow=True)
    y = theano.shared(y, borrow=True)

    trainset_masks = masks[:, :split_idx, :]
    testset_masks = masks[:, split_idx:, :]

    trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx]
    testset_X, testset_y = X[:, split_idx:, :], y[split_idx:]

    index = T.lscalar()
    x = T.tensor3('x')
    target = T.lvector('target')
    print_x = theano.printing.Print('\nx')(x)
    print_target = theano.printing.Print('target')(target)
    mask = T.tensor3('mask')

    print('building model...')

    lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save'
    lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save'
    lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save'
    softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save'

    lstm_1 = load_model(lstm_1_filepath)
    # lstm_2 = load_model(lstm_2_filepath)
    # lstm_3 = load_model(lstm_3_filepath)
    softmax = load_model(softmax_filepath)

    #lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3)
    #lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3)
    #lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3)
    #softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)

    # layers = [lstm_1, lstm_2, lstm_3, softmax]
    layers = [lstm_1, softmax]

    cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood
    rnn = variable_length_sequence_lstm.MLP(layers,
                                            cost=cost_expr,
                                            return_indices=[-1])

    cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005)

    batch_size = 10

    print('building trainer...')
    trainer = theano.function(
        [index], [cost],
        updates=updates,
        givens={
            x: trainset_X[:, index * batch_size:(index + 1) * batch_size],
            target: trainset_y[index * batch_size:(index + 1) * batch_size],
            mask: trainset_masks[:,
                                 index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    errors = rnn.layers[-1].errors(target)
    validate_model = theano.function(
        inputs=[index],
        outputs=[cost, errors],
        givens={
            x: testset_X[:, index * batch_size:(index + 1) * batch_size],
            target: testset_y[index * batch_size:(index + 1) * batch_size],
            mask: testset_masks[:, index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    print('training model...')
    n_train_examples = trainset_X.shape.eval()[1]
    n_test_examples = testset_X.shape.eval()[1]

    n_epochs = 1000
    lowest_cost = -1
    n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size))
    n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size))
    for epoch in range(n_epochs):
        costs = []
        #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100)

        for sample_idx in range(n_train_batches):
            # for sample_idx in random_indices:
            costs.append(trainer(sample_idx)[0])
        avg_cost = np.mean(costs)
        print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

        if lowest_cost == -1 or avg_cost < lowest_cost * 0.99:
            lowest_cost = avg_cost
            run_validation = True
            save_model(lstm_1, lstm_1_filepath)
            # save_model(lstm_2, lstm_2_filepath)
            # save_model(lstm_3, lstm_3_filepath)
            save_model(softmax, softmax_filepath)

        predictions = []
        if run_validation:
            print('\nvalidation')
            for sample_idx in range(n_validation_batches):
                predictions.append(validate_model(sample_idx)[1])
            accuracy = (1 - np.mean(predictions)) * 100
            print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy))
            run_validation = False

    # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy))
    print('finished training, final stats:\nfinal cost: {0}'.format(
        np.mean(costs)))

    for layer in rnn.layers:
        for param in layer.params:
            print('{}: {}'.format(param.name, param.get_value()))
Пример #2
0
def main_theano_sign_lang_var_len_adadelta():
	"""
	:description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches.
	"""
	print('loading data...')
	n_input_at_each_timestep = 10
	n_classes = 97	# no base 0 considered, there are just 98 of them. May need to be 97
	
	X, y = sign_lang.load_data_from_aggregate_file()
	X, masks = sign_lang.pad_data_to_max_sample_length(X)
	X = X.astype(theano.config.floatX)
	masks = masks.astype(theano.config.floatX)
	X = np.swapaxes(X, 0, 1)
	masks = np.swapaxes(masks, 0, 1)

	split_idx = int(.8 * X.shape[1])

	X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
	masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True)
	y = theano.shared(y, borrow=True)

	trainset_masks = masks[:, :split_idx, :]
	testset_masks = masks[:, split_idx:, :]
	
	trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx]
	testset_X, testset_y = X[:, split_idx:, :], y[split_idx:]

	index = T.lscalar()
	x = T.tensor3('x')
	target = T.lvector('target')
	print_x = theano.printing.Print('\nx')(x)
	print_target = theano.printing.Print('target')(target)
	mask = T.tensor3('mask')

	print('building model...')

	lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save'
	lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save'
	lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save'
	softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save'
	
	lstm_1 = load_model(lstm_1_filepath)
	# lstm_2 = load_model(lstm_2_filepath)
	# lstm_3 = load_model(lstm_3_filepath)
	softmax = load_model(softmax_filepath)


	#lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3)
	#lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3)
	#lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3)
	#softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)

	# layers = [lstm_1, lstm_2, lstm_3, softmax]
	layers = [lstm_1, softmax]

	cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood
	rnn = variable_length_sequence_lstm.MLP(layers, cost=cost_expr, return_indices=[-1])

	cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005)

	batch_size = 10

	print('building trainer...')
	trainer = theano.function(
		[index],
		[cost],
		updates=updates,
		givens={
			x: trainset_X[:, index * batch_size: (index + 1) * batch_size],
			target: trainset_y[index * batch_size: (index + 1) * batch_size],
			mask: trainset_masks[:, index * batch_size: (index + 1) * batch_size]
		},
		mode='FAST_RUN'
	)

	errors = rnn.layers[-1].errors(target)
	validate_model = theano.function(
		inputs=[index],
		outputs=[cost, errors],
		givens={
			x: testset_X[:, index * batch_size: (index + 1) * batch_size],
			target: testset_y[index * batch_size: (index + 1) * batch_size],
			mask: testset_masks[:, index * batch_size: (index + 1) * batch_size]
		},
		mode='FAST_RUN'
	)

	print('training model...')
	n_train_examples = trainset_X.shape.eval()[1]
	n_test_examples = testset_X.shape.eval()[1]

	n_epochs = 1000
	lowest_cost = -1
	n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size))
	n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size))
	for epoch in range(n_epochs):
		costs = []
		#random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100)

		for sample_idx in range(n_train_batches):
		# for sample_idx in random_indices:
			costs.append(trainer(sample_idx)[0])
		avg_cost = np.mean(costs)
		print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

		if lowest_cost == -1 or avg_cost < lowest_cost * 0.99:
			lowest_cost = avg_cost
			run_validation = True
			save_model(lstm_1, lstm_1_filepath)
			# save_model(lstm_2, lstm_2_filepath)
			# save_model(lstm_3, lstm_3_filepath)
			save_model(softmax, softmax_filepath)

		predictions = []
		if run_validation:
			print('\nvalidation')
			for sample_idx in range(n_validation_batches):
				predictions.append(validate_model(sample_idx)[1])
			accuracy = (1 - np.mean(predictions)) * 100
		 	print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy))
		 	run_validation = False

	# print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy))
	print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs)))

	for layer in rnn.layers:
		for param in layer.params:
			print('{}: {}'.format(param.name, param.get_value()))
Пример #3
0
def main_theano_sign_lang():

    print('loading data...')
    n_input_at_each_timestep = 10
    n_classes = 97  # no base 0 considered, there are just 98 of them. May need to be 97
    dataset_sequence_length = 31

    X, y = sign_lang.load_data_from_aggregate_file()
    X = chest_accel.truncate_to_smallest(X)
    split_idx = int(.8 * X.shape[0])

    X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
    y = theano.shared(y, borrow=True)

    trainset_X, trainset_y = X[:split_idx], y[:split_idx]
    testset_X, testset_y = X[split_idx:], y[split_idx:]

    index = T.lscalar()
    x = T.matrix('x')
    target = T.lscalar('target')
    print_x = theano.printing.Print('\nx')(x)
    print_target = theano.printing.Print('target')(target)

    print('building model...')
    # layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, return_indices=[-1]), Softmax(n_vis=rec_n_hid, n_classes=n_classes)]

    # single layer
    #layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='recurrent', return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

    # double layer
    #layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, layer_name='rec_1'), EncDecRecurrent(n_vis=rec_n_hid, n_hid=n_input_at_each_timestep, layer_name='rec_2',return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

    # lstm
    #layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm', return_indices=[-1], dropout_prob=0.3), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

    # 2*lstm
    #layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1', dropout_prob=0.2),LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2', dropout_prob=0.2, return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

    encoding_rec_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc_dec_overlap_1.save'
    lstm_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save'
    softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save'
    encoding_rec = load_model(encoding_rec_filepath)
    # recurrent_1 = load_model(lstm_filepath)
    # softmax = load_model(softmax_filepath)

    # encoding_rec = encoding_recurrent_overlap.EncodingRecurrentOverlap(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='enc_1')

    # print('building pretrainer...')
    # pre_cost, pre_updates = encoding_rec.get_pretraining_cost_updates(x, learning_rate=0.001)

    # pretrainer = theano.function(
    # 	[index],
    # 	[pre_cost],
    # 	updates=pre_updates,
    # 	givens={
    # 		x: trainset_X[index]
    # 	},
    # 	mode='FAST_RUN'
    # )

    # print('pretraining model...')
    # n_epochs = 20
    # n_train_examples = trainset_X.shape.eval()[0]
    # for epoch in range(n_epochs):
    # 	costs = []
    # 	#random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=10)
    # 	for sample_idx in range(n_train_examples):
    # 	#for sample_idx in random_indices:
    # 		costs.append(pretrainer(sample_idx)[0])
    # 	print('training cost for epoch {0}: {1}'.format(epoch, np.mean(costs)))

    # for param in encoding_rec.reconstruction_params:
    # 		print('{}: {}'.format(param.name, param.get_value()))

    # save_model(encoding_rec, encoding_rec_filepath)

    recurrent_1 = LSTM(n_vis=n_input_at_each_timestep,
                       n_hid=n_input_at_each_timestep,
                       layer_name='rec_1',
                       return_indices=[-1],
                       dropout_prob=0.3)
    # #recurrent_2 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=[-1], dropout_prob=0.2)
    softmax = Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)

    # 1*encoding + recurrent
    layers = [encoding_rec, recurrent_1, softmax]
    # layers = [recurrent_1, softmax]

    # 3*lstm
    # layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1'),
    # 		LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2'),
    # 		LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_3', return_indices=[-1]),
    # 		Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

    # rnn = EncDecRNN(layers, cost=model_cost, return_indices=[-1])
    cost = Softmax.negative_log_likelihood
    rnn = EncDecRNN(layers, cost=cost, return_indices=[-1])

    # cost, updates = rnn.get_cost_updates((x, print_target))
    cost, updates = rnn.get_cost_updates((x, target))

    print('building trainer...')
    trainer = theano.function([index], [cost],
                              updates=updates,
                              givens={
                                  x: trainset_X[index],
                                  target: trainset_y[index]
                              },
                              mode='FAST_RUN')

    errors = rnn.layers[-1].errors(target)
    validate_model = theano.function(inputs=[index],
                                     outputs=[cost, errors],
                                     givens={
                                         x: testset_X[index],
                                         target: testset_y[index]
                                     },
                                     mode='FAST_RUN')

    print('training model...')
    n_train_examples = trainset_X.shape.eval()[0]
    n_test_examples = testset_X.shape.eval()[0]

    n_epochs = 100
    lowest_cost = -1
    for epoch in range(n_epochs):
        costs = []
        #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100)
        for sample_idx in range(n_train_examples):
            # for sample_idx in random_indices:
            costs.append(trainer(sample_idx)[0])
        avg_cost = np.mean(costs)
        print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

        if lowest_cost == -1 or avg_cost < lowest_cost * 0.98:
            lowest_cost = avg_cost
            run_validation = True
            save_model(recurrent_1, lstm_filepath)
            save_model(softmax, softmax_filepath)

        predictions = []
        if run_validation:
            print('\nvalidation')
            for sample_idx in range(n_test_examples):
                predictions.append(validate_model(sample_idx)[1])
            accuracy = (1 - np.mean(predictions)) * 100
            print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy))
            run_validation = False

    # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy))
    print('finished training, final stats:\nfinal cost: {0}'.format(
        np.mean(costs)))

    for layer in rnn.layers:
        for param in layer.params:
            print('{}: {}'.format(param.name, param.get_value()))
Пример #4
0
def main_theano_sign_lang():

	print('loading data...')
	n_input_at_each_timestep = 10
	n_classes = 97	# no base 0 considered, there are just 98 of them. May need to be 97
	dataset_sequence_length = 31
	
	X, y = sign_lang.load_data_from_aggregate_file()
	X = chest_accel.truncate_to_smallest(X)
	split_idx = int(.8 * X.shape[0])

	X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
	y = theano.shared(y, borrow=True)
	
	trainset_X, trainset_y = X[:split_idx], y[:split_idx]
	testset_X, testset_y = X[split_idx:], y[split_idx:]

	index = T.lscalar()
	x = T.matrix('x')
	target = T.lscalar('target')
	print_x = theano.printing.Print('\nx')(x)
	print_target = theano.printing.Print('target')(target)

	print('building model...')
	# layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, return_indices=[-1]), Softmax(n_vis=rec_n_hid, n_classes=n_classes)]
	
	# single layer
	#layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='recurrent', return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

	# double layer
	#layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, layer_name='rec_1'), EncDecRecurrent(n_vis=rec_n_hid, n_hid=n_input_at_each_timestep, layer_name='rec_2',return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

	# lstm
	#layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm', return_indices=[-1], dropout_prob=0.3), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

	# 2*lstm
	#layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1', dropout_prob=0.2),LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2', dropout_prob=0.2, return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

	encoding_rec_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc_dec_overlap_1.save'
	lstm_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save'
	softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save'
	encoding_rec = load_model(encoding_rec_filepath)
	# recurrent_1 = load_model(lstm_filepath)
	# softmax = load_model(softmax_filepath)

	# encoding_rec = encoding_recurrent_overlap.EncodingRecurrentOverlap(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='enc_1')

	# print('building pretrainer...')
	# pre_cost, pre_updates = encoding_rec.get_pretraining_cost_updates(x, learning_rate=0.001)
	
	# pretrainer = theano.function(
	# 	[index],
	# 	[pre_cost],
	# 	updates=pre_updates,
	# 	givens={
	# 		x: trainset_X[index]
	# 	},
	# 	mode='FAST_RUN'
	# )

	# print('pretraining model...')
	# n_epochs = 20
	# n_train_examples = trainset_X.shape.eval()[0]
	# for epoch in range(n_epochs):
	# 	costs = []
	# 	#random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=10)
	# 	for sample_idx in range(n_train_examples):
	# 	#for sample_idx in random_indices:
	# 		costs.append(pretrainer(sample_idx)[0])
	# 	print('training cost for epoch {0}: {1}'.format(epoch, np.mean(costs)))

	# for param in encoding_rec.reconstruction_params:
	# 		print('{}: {}'.format(param.name, param.get_value()))

	
	# save_model(encoding_rec, encoding_rec_filepath)


	recurrent_1 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3)
	# #recurrent_2 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=[-1], dropout_prob=0.2)
	softmax = Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)


	# 1*encoding + recurrent
	layers = [encoding_rec, recurrent_1, softmax]
	# layers = [recurrent_1, softmax]

	# 3*lstm
	# layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1'),
	# 		LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2'),
	# 		LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_3', return_indices=[-1]), 
	# 		Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)]

	# rnn = EncDecRNN(layers, cost=model_cost, return_indices=[-1])
	cost = Softmax.negative_log_likelihood
	rnn = EncDecRNN(layers, cost=cost, return_indices=[-1])

	# cost, updates = rnn.get_cost_updates((x, print_target))
	cost, updates = rnn.get_cost_updates((x, target))

	print('building trainer...')
	trainer = theano.function(
		[index],
		[cost],
		updates=updates,
		givens={
			x: trainset_X[index],
			target: trainset_y[index]
		},
		mode='FAST_RUN'
	)

	errors = rnn.layers[-1].errors(target)
	validate_model = theano.function(
		inputs=[index],
		outputs=[cost, errors],
		givens={
			x: testset_X[index],
			target: testset_y[index]
		},
		mode='FAST_RUN'
	)

	print('training model...')
	n_train_examples = trainset_X.shape.eval()[0]
	n_test_examples = testset_X.shape.eval()[0]

	n_epochs = 100
	lowest_cost = -1
	for epoch in range(n_epochs):
		costs = []
		#random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100)
		for sample_idx in range(n_train_examples):
		# for sample_idx in random_indices:
			costs.append(trainer(sample_idx)[0])
		avg_cost = np.mean(costs)
		print('training cost for epoch {0}: {1}'.format(epoch, avg_cost))

		if lowest_cost == -1 or avg_cost < lowest_cost * 0.98:
			lowest_cost = avg_cost
			run_validation = True
			save_model(recurrent_1, lstm_filepath)
			save_model(softmax, softmax_filepath)

		predictions = []
		if run_validation:
			print('\nvalidation')
			for sample_idx in range(n_test_examples):
				predictions.append(validate_model(sample_idx)[1])
			accuracy = (1 - np.mean(predictions)) * 100
		 	print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy))
		 	run_validation = False

	# print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy))
	print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs)))

	for layer in rnn.layers:
		for param in layer.params:
			print('{}: {}'.format(param.name, param.get_value()))