def pretrain_sda_cg(sda, train_names, read_window, read_algo, read_rank, window_size, pretraining_epochs, corruption_levels): ## Pre-train layer-wise print '... getting the pretraining functions' import scipy.optimize for i in xrange(sda.n_layers): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) for patients in xrange(n_train_patients): train_set_x, train_set_y = train_reader.read_next_doc( algo=read_algo, window=read_window, rank=read_rank) pretraining_fn, pretraining_update = pretraining_functions_sda_cg( sda=sda, train_set_x=train_set_x, window_size=window_size, corruption_levels=corruption_levels) print '... pre-training the model' # using scipy conjugate gradient optimizer print("Optimizing using scipy.optimize.fmin_cg...") best_w_b = scipy.optimize.fmin_cg( f=partial(pretraining_fn, da_index=i), x0=numpy.zeros((sda.dA_layers[i].n_visible + 1) * sda.dA_layers[i].n_hidden, dtype=sda.dA_layers[i].input.dtype), fprime=partial(pretraining_update, da_index=i), maxiter=pretraining_epochs) return sda
def train_logistic_cg(read_algo, read_window, read_rank, train_names, valid_names, window_size, n_epochs, classifier): # read the datasets train_reader = ICHISeqDataReader(train_names) valid_reader = ICHISeqDataReader(valid_names) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # generate symbolic variables for input x = classifier.x # data, presented as window with x, y, x for each sample y = T.iscalar('y') # labels, presented as int label # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) for pat_num in xrange(len(train_names)): # go through the training set train_set_x, train_set_y = train_reader.read_next_doc( algo=read_algo, window=read_window, rank=read_rank) valid_set_x, valid_set_y = valid_reader.read_next_doc( algo=read_algo, window=read_window, rank=read_rank) n_train_samples = train_set_x.get_value( borrow=True).shape[0] - window_size + 1 n_valid_samples = valid_set_x.get_value( borrow=True).shape[0] - window_size + 1 validate_model = theano.function( [index], classifier.errors(y), givens={ x: valid_set_x[index:index + window_size], y: valid_set_y[index + window_size - 1] }, name="validate") # compile a theano function that returns the cost conj_cost = theano.function( inputs=[index], outputs=[cost, classifier.errors(y), classifier.predict(), y], givens={ x: train_set_x[index:index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_cost") # compile a theano function that returns the gradient with respect to theta conj_grad = theano.function( [index], T.grad(cost, classifier.theta), givens={ x: train_set_x[index:index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_grad") train_confusion_matrix = numpy.zeros((7, 7)) # creates a function that computes the average cost on the training set def train_fn(theta_value): classifier.theta.set_value(theta_value, borrow=True) cur_train_cost = [] cur_train_error = [] for i in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = conj_cost(i) cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) train_confusion_matrix[cur_actual][cur_pred] += 1 this_train_loss = float(numpy.mean(cur_train_cost)) classifier.train_cost_array.append([]) classifier.train_cost_array[-1].append(classifier.epoch) classifier.train_cost_array[-1].append(this_train_loss) classifier.train_error_array.append([]) classifier.train_error_array[-1].append(classifier.epoch) classifier.train_error_array[-1].append( float(numpy.mean(cur_train_error) * 100)) classifier.epoch += 1 return this_train_loss # creates a function that computes the average gradient of cost with # respect to theta def train_fn_grad(theta_value): classifier.theta.set_value(theta_value, borrow=True) grad = conj_grad(0) for i in xrange(1, n_train_samples): grad += conj_grad(i) return grad / n_train_samples # creates the validation function def callback(theta_value): classifier.theta.set_value(theta_value, borrow=True) #compute the validation loss validation_losses = [ validate_model(i) for i in xrange(n_valid_samples) ] this_validation_loss = float( numpy.mean(validation_losses) * 100., ) print('validation error %f %%' % (this_validation_loss)) classifier.valid_error_array.append([]) classifier.valid_error_array[-1].append(classifier.epoch) classifier.valid_error_array[-1].append(this_validation_loss) ############### # TRAIN MODEL # ############### # using scipy conjugate gradient optimizer print("Optimizing using scipy.optimize.fmin_cg...") best_theta = scipy.optimize.fmin_cg( f=train_fn, x0=numpy.zeros((classifier.n_in + 1) * classifier.n_out, dtype=x.dtype), fprime=train_fn_grad, callback=callback, disp=0, maxiter=n_epochs) return classifier