def __init__(self, in_size=8, hidden_size=[500, 500, 250], out_size=10, batch_size=10, corruption_levels=[0.1, 0.1, 0.1], dropout=True, drop_rates=[0.5, 0.2, 0.2]): self.i_size = in_size self.h_sizes = hidden_size self.o_size = out_size self.batch_size = batch_size self.n_layers = len(hidden_size) self.sa_layers = [] self.sa_activations_train = [] self.sa_activations_test = [] self.thetas = [] self.thetas_as_blocks = [] self.dropout = dropout self.drop_rates = drop_rates #check if there are layer_count+1 number of dropout rates (extra one for softmax) if dropout: assert self.n_layers + 1 == len(self.drop_rates) self.corruption_levels = corruption_levels #check if there are layer_count number of corruption levels if denoising: assert self.n_layers == len(self.corruption_levels) self.cost_fn_names = ['sqr_err', 'neg_log'] self.x = T.matrix('x') #store the inputs self.y = T.matrix('y') #store the labels for the corresponding inputs self.fine_cost = T.dscalar('fine_cost') #fine tuning cost self.error = T.dscalar('test_error') #test error value #print network info print "Network Info:" print "Layers: %i" % self.n_layers print "Layer sizes: ", print self.h_sizes print "" print "Building the model..." #intializing the network. #crating SparseAutoencoders and storing them in sa_layers #calculating hidden activations (symbolic) and storing them in sa_activations_train/test #there are two types of activations as the calculations are different for train and test with dropout for i in xrange(self.n_layers): if i == 0: curr_input_size = self.i_size else: curr_input_size = self.h_sizes[i - 1] #if i==0 input is the raw input if i == 0: curr_input_train = self.x curr_input_test = self.x #otherwise input is the previous layer's hidden activation else: a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) curr_input_train = self.sa_activations_train[-1] curr_input_test = self.sa_activations_test[-1] sa = SparseAutoencoder(n_inputs=curr_input_size, n_hidden=self.h_sizes[i], x_train=curr_input_train, x_test=curr_input_test, dropout=dropout, dropout_rate=self.drop_rates[i]) self.sa_layers.append(sa) self.thetas.extend(self.sa_layers[-1].get_params()) self.thetas_as_blocks.append(self.sa_layers[-1].get_params()) #-1 index gives the last element a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) self.outLayer = OutputLayer(n_inputs=self.h_sizes[-1], n_outputs=self.o_size, x_train=self.sa_activations_train[-1], x_test=self.sa_activations_test[-1], y=self.y, dropout=self.dropout, dropout_rate=self.drop_rates[-1]) self.lam_fine_tune = T.scalar('lam') self.fine_cost = self.outLayer.get_cost(self.lam_fine_tune, cost_fn=self.cost_fn_names[1]) self.thetas.extend(self.outLayer.theta) #measure test performance self.error = self.outLayer.get_error(self.y) self.predict = self.outLayer.get_output()
def __init__(self,in_size=8, hidden_size = [500, 500, 250], out_size = 10, batch_size = 10, corruption_levels=[0.1, 0.1, 0.1],dropout=True,drop_rates=[0.5,0.2,0.2]): self.i_size = in_size self.h_sizes = hidden_size self.o_size = out_size self.batch_size = batch_size self.n_layers = len(hidden_size) self.sa_layers = [] self.sa_activations_train = [] self.sa_activations_test = [] self.thetas = [] self.thetas_as_blocks = [] self.dropout = dropout self.drop_rates = drop_rates #check if there are layer_count+1 number of dropout rates (extra one for softmax) if dropout: assert self.n_layers+1 == len(self.drop_rates) self.corruption_levels = corruption_levels #check if there are layer_count number of corruption levels if denoising: assert self.n_layers == len(self.corruption_levels) self.cost_fn_names = ['sqr_err', 'neg_log'] self.x = T.matrix('x') #store the inputs self.y = T.matrix('y') #store the labels for the corresponding inputs self.fine_cost = T.dscalar('fine_cost') #fine tuning cost self.error = T.dscalar('test_error') #test error value #print network info print "Network Info:" print "Layers: %i" %self.n_layers print "Layer sizes: ", print self.h_sizes print "" print "Building the model..." #intializing the network. #crating SparseAutoencoders and storing them in sa_layers #calculating hidden activations (symbolic) and storing them in sa_activations_train/test #there are two types of activations as the calculations are different for train and test with dropout for i in xrange(self.n_layers): if i==0: curr_input_size = self.i_size else: curr_input_size = self.h_sizes[i-1] #if i==0 input is the raw input if i==0: curr_input_train = self.x curr_input_test = self.x #otherwise input is the previous layer's hidden activation else: a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) curr_input_train = self.sa_activations_train[-1] curr_input_test = self.sa_activations_test[-1] sa = SparseAutoencoder(n_inputs=curr_input_size, n_hidden=self.h_sizes[i], x_train=curr_input_train, x_test=curr_input_test, dropout=dropout, dropout_rate=self.drop_rates[i]) self.sa_layers.append(sa) self.thetas.extend(self.sa_layers[-1].get_params()) self.thetas_as_blocks.append(self.sa_layers[-1].get_params()) #-1 index gives the last element a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) self.outLayer = OutputLayer(n_inputs=self.h_sizes[-1], n_outputs=self.o_size, x_train=self.sa_activations_train[-1], x_test = self.sa_activations_test[-1], y=self.y, dropout=self.dropout, dropout_rate=self.drop_rates[-1]) self.lam_fine_tune = T.scalar('lam') self.fine_cost = self.outLayer.get_cost(self.lam_fine_tune,cost_fn=self.cost_fn_names[1]) self.thetas.extend(self.outLayer.theta) #measure test performance self.error = self.outLayer.get_error(self.y) self.predict = self.outLayer.get_output()
class StackedAutoencoder(object): def __init__(self, in_size=8, hidden_size=[500, 500, 250], out_size=10, batch_size=10, corruption_levels=[0.1, 0.1, 0.1], dropout=True, drop_rates=[0.5, 0.2, 0.2]): self.i_size = in_size self.h_sizes = hidden_size self.o_size = out_size self.batch_size = batch_size self.n_layers = len(hidden_size) self.sa_layers = [] self.sa_activations_train = [] self.sa_activations_test = [] self.thetas = [] self.thetas_as_blocks = [] self.dropout = dropout self.drop_rates = drop_rates #check if there are layer_count+1 number of dropout rates (extra one for softmax) if dropout: assert self.n_layers + 1 == len(self.drop_rates) self.corruption_levels = corruption_levels #check if there are layer_count number of corruption levels if denoising: assert self.n_layers == len(self.corruption_levels) self.cost_fn_names = ['sqr_err', 'neg_log'] self.x = T.matrix('x') #store the inputs self.y = T.matrix('y') #store the labels for the corresponding inputs self.fine_cost = T.dscalar('fine_cost') #fine tuning cost self.error = T.dscalar('test_error') #test error value #print network info print "Network Info:" print "Layers: %i" % self.n_layers print "Layer sizes: ", print self.h_sizes print "" print "Building the model..." #intializing the network. #crating SparseAutoencoders and storing them in sa_layers #calculating hidden activations (symbolic) and storing them in sa_activations_train/test #there are two types of activations as the calculations are different for train and test with dropout for i in xrange(self.n_layers): if i == 0: curr_input_size = self.i_size else: curr_input_size = self.h_sizes[i - 1] #if i==0 input is the raw input if i == 0: curr_input_train = self.x curr_input_test = self.x #otherwise input is the previous layer's hidden activation else: a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) curr_input_train = self.sa_activations_train[-1] curr_input_test = self.sa_activations_test[-1] sa = SparseAutoencoder(n_inputs=curr_input_size, n_hidden=self.h_sizes[i], x_train=curr_input_train, x_test=curr_input_test, dropout=dropout, dropout_rate=self.drop_rates[i]) self.sa_layers.append(sa) self.thetas.extend(self.sa_layers[-1].get_params()) self.thetas_as_blocks.append(self.sa_layers[-1].get_params()) #-1 index gives the last element a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) self.outLayer = OutputLayer(n_inputs=self.h_sizes[-1], n_outputs=self.o_size, x_train=self.sa_activations_train[-1], x_test=self.sa_activations_test[-1], y=self.y, dropout=self.dropout, dropout_rate=self.drop_rates[-1]) self.lam_fine_tune = T.scalar('lam') self.fine_cost = self.outLayer.get_cost(self.lam_fine_tune, cost_fn=self.cost_fn_names[1]) self.thetas.extend(self.outLayer.theta) #measure test performance self.error = self.outLayer.get_error(self.y) self.predict = self.outLayer.get_output() def load_max_pat(self, file_path): f = open(file_path, 'rb') max_patients = cPickle.load(f) return max_patients def load_pred_ins(self, file_path): f = open(file_path, 'rb') pred_ins = cPickle.load(f) return shared(value=np.asarray(pred_ins, dtype=config.floatX), borrow=True) def load_cancer_data(self, file_path): f = open(file_path, 'rb') cancer_data = cPickle.load(f) return cancer_data def load_data(self, file_path='data.pkl', make_predict=True): f = open(file_path, 'rb') if not make_predict: all_ins, all_outs, all_v_in, all_v_out, all_t_in, all_t_out = cPickle.load( f) train_set = [all_ins, all_outs] valid_set = [all_v_in, all_v_out] test_set = [all_t_in, all_t_out] f.close() else: all_ins, all_outs, all_v_in, all_v_out = cPickle.load(f) train_set = [all_ins, all_outs] valid_set = [all_v_in, all_v_out] test_set = [all_v_in, all_v_out] def get_shared_data(data_xy): data_x, data_y = data_xy shared_x = shared(value=np.asarray(data_x, dtype=config.floatX), borrow=True) shared_y = shared(value=np.asarray(data_y, dtype=config.floatX), borrow=True) return shared_x, shared_y train_x, train_y = get_shared_data(train_set) valid_x, valid_y = get_shared_data(valid_set) test_x, test_y = get_shared_data(test_set) all_data = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] return all_data def greedy_pre_training(self, train_x, batch_size=1, pre_lr=0.25, denoising=False): pre_train_fns = [] index = T.lscalar('index') lam = T.scalar('lam') beta = T.scalar('beta') rho = T.scalar('rho') i = 0 print "\nCompiling functions for DA layers..." for sa in self.sa_layers: cost, updates = sa.get_cost_and_updates( l_rate=pre_lr, lam=lam, beta=beta, rho=rho, cost_fn=self.cost_fn_names[1], corruption_level=self.corruption_levels[i], denoising=denoising) #the givens section in this line set the self.x that we assign as input to the initial # curr_input value be a small batch rather than the full batch. # however, we don't need to set subsequent inputs to be an only a minibatch # because if self.x is only a portion, you're going to get the hidden activations # corresponding to that small batch of inputs. # Therefore, setting self.x to be a mini-batch is enough to make all the subsequents use # hidden activations corresponding to that mini batch of self.x sa_fn = function(inputs=[ index, Param(lam, default=0.25), Param(beta, default=0.25), Param(rho, default=0.2) ], outputs=cost, updates=updates, givens={ self.x: train_x[index * batch_size:(index + 1) * batch_size] }) pre_train_fns.append(sa_fn) i = i + 1 return pre_train_fns def fine_tuning(self, datasets, batch_size=1, fine_lr=0.2): (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch gparams = T.grad(self.fine_cost, self.thetas) updates = [(param, param - gparam * fine_lr) for param, gparam in zip(self.thetas, gparams)] fine_tuen_fn = function( inputs=[index, Param(self.lam_fine_tune, default=0.25)], outputs=self.fine_cost, updates=updates, givens={ self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size] }) validation_fn = function( inputs=[index], outputs=self.error, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') def valid_score(): return [validation_fn(i) for i in xrange(n_valid_batches)] return fine_tuen_fn, valid_score def train_model(self, datasets=None, pre_epochs=5, fine_epochs=300, pre_lr=0.25, fine_lr=0.2, batch_size=1, lam=0.0001, beta=0.25, rho=0.2, denoising=False): print "Training Info..." print "Batch size: ", print batch_size print "Pre-training: %f (lr) %i (epochs)" % (pre_lr, pre_epochs) print "Fine-tuning: %f (lr) %i (epochs)" % (fine_lr, fine_epochs) print "Corruption: ", print denoising, print self.corruption_levels print "Weight decay: ", print lam print "Dropout: ", print self.dropout, print self.drop_rates print "Sparcity: ", print "%f (beta) %f (rho)" % (beta, rho) (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] n_train_batches = train_set_x.get_value( borrow=True).shape[0] / batch_size pre_train_fns = self.greedy_pre_training(train_set_x, batch_size=self.batch_size, pre_lr=pre_lr, denoising=denoising) start_time = time.clock() for i in xrange(self.n_layers): print "\nPretraining layer %i" % i for epoch in xrange(pre_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(pre_train_fns[i](index=batch_index, lam=lam, beta=beta, rho=rho)) print 'Training epoch %d, cost ' % epoch, print np.mean(c) end_time = time.clock() training_time = (end_time - start_time) print "Training time: %f" % training_time ######################################################################### ##### Fine Tuning ##### ######################################################################### print "\nFine tuning..." fine_tune_fn, valid_model = self.fine_tuning( datasets, batch_size=self.batch_size, fine_lr=fine_lr) ######################################################################### ##### Early-Stopping ##### ######################################################################### patience = 10 * n_train_batches # look at this many examples patience_increase = 2. improvement_threshold = 1.005 #validation frequency - the number of minibatches to go through before checking validation set validation_freq = min(n_train_batches, patience / 2) #we want to minimize best_valid_loss, so we shoudl start with largest best_valid_loss = np.inf test_score = 0. done_looping = False epoch = 0 while epoch < fine_epochs and (not done_looping): epoch = epoch + 1 fine_tune_cost = [] for mini_index in xrange(n_train_batches): cost = fine_tune_fn(index=mini_index, lam=lam) fine_tune_cost.append(cost) #what's the role of iter? iter acts as follows #in first epoch, iter for minibatch 'x' is x #in second epoch, iter for minibatch 'x' is n_train_batches + x #iter is the number of minibatches processed so far... iter = (epoch - 1) * n_train_batches + mini_index # this is an operation done in cycles. 1 cycle is iter+1/validation_freq # doing this every epoch if (iter + 1) % validation_freq == 0: validation_losses = valid_model() curr_valid_loss = np.mean(validation_losses) print 'epoch %i, minibatch %i/%i, validation error is %f %%' % ( epoch, mini_index + 1, n_train_batches, curr_valid_loss * 100) if curr_valid_loss < best_valid_loss: if (curr_valid_loss < best_valid_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_valid_loss = curr_valid_loss best_iter = iter print 'Fine tune cost for epoch %i, is %f' % ( epoch + 1, np.mean(fine_tune_cost)) #patience is here to check the maximum number of iterations it should check #before terminating if patience <= iter: done_looping = True break def get_correct_max_pat(self, x, max_pat): for p in max_pat: if p[0] == x[0] and p[1] == x[1] and p[2] == x[2]: return p[3] return -1 def test_model(self, test_set_x, test_set_y, batch_size=1, max_pat=None, cancers=None): print '\nTesting the model...' n_test_batches = test_set_x.get_value( borrow=True).shape[0] / batch_size index = T.lscalar('index') #no update parameters, so this just returns the values it calculate #without objetvie function minimization test_fn = function( inputs=[index], outputs=[self.error, self.y, self.predict, self.x], givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size], self.y: test_set_y[index * batch_size:(index + 1) * batch_size] }, name='test') e = [] pred_vals = [] act_vals = [] errsAll = dict() for batch_index in xrange(n_test_batches): err, act, pred, x = test_fn(batch_index) e.append(err) for p, a, x_i in zip(pred, act, x): errsSingle = [] for p2, a2 in zip(p, a): max_pat_val = self.get_correct_max_pat(x_i, max_pat) diff = abs(p2 - a2) if a2 > 0.0: errTmp = np.mean(diff / a2) else: errTmp = 0.0 errsSingle.append(errTmp) key = self.get_key(x_i, cancers) if key not in errsAll: errsAll[key] = [errsSingle] else: errPrev = errsAll[key] errPrev.append(errsSingle) errsAll[key] = errPrev #if all(v == 0 for v in errsSingle): #pred_vals.append(pred) idx = 2 max_pat_val = self.get_correct_max_pat(x[idx], max_pat) for p in pred[idx]: print int(p * max_pat_val), print "" idx = 5 max_pat_val = self.get_correct_max_pat(x[idx], max_pat) for p in pred[idx]: print int(p * max_pat_val), print "" idx = 8 max_pat_val = self.get_correct_max_pat(x[idx], max_pat) for p in pred[idx]: print int(p * max_pat_val), print "" idx = 2 max_pat_val = self.get_correct_max_pat(x[idx], max_pat) for a in act[idx]: print int(a * max_pat_val), print "" idx = 5 max_pat_val = self.get_correct_max_pat(x[idx], max_pat) for a in act[idx]: print int(a * max_pat_val), print "" idx = 8 max_pat_val = self.get_correct_max_pat(x[idx], max_pat) for a in act[idx]: print int(a * max_pat_val), print "" #act_vals.append(act) #print pred,',',act keys = [] errors = [] for k in errsAll: keys.append(k) tmp3 = errsAll.get(k) tmp = np.asarray(errsAll.get(k)) errsForKey = np.mean(tmp * 100, axis=0) errors.append(errsForKey) print 'Test Error for ', k, ": ", errsForKey self.create_csv_errors(keys, errors) def get_key(self, x, cancers): c_idx = int(round(x[0] * len(cancers))) s = cancers[c_idx] if x[1] == 1.0: s = s + ",Male" else: s = s + ",Female" if x[2] == 1.0: s = s + ",Mortality" else: s = s + ",Incidence" return s def predict_val(self, pred_ins): print 'Predicting ....' #no update parameters, so this just returns the values it calculate #without objetvie function minimization pred_fn = function(inputs=[], outputs=[self.predict], givens={self.x: pred_ins}, name='predict') e = [] pred_vals = [] act_vals = [] pred = pred_fn() return pred def create_csv_errors(self, keys, errors): all_strings = [] header = [ 'Cancer', 'Gender', 'Status', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020' ] all_strings.append(header) for (k, e) in zip(keys, errors): single_str = [] single_str.extend(k.split(",")) for val in e: single_str.append(str(val)) all_strings.append(single_str) with open('errors.csv', 'wb') as f: writer = csv.writer(f) writer.writerows(all_strings) def create_csv(self, x, pred, cancers, max_patients, num_in_years): x_arr = x.get_value() all_strings = [] header = [] header.append('Cancer') header.append('Gender') header.append('Status') for yr in xrange(2006, 2011): header.append(str(yr)) for yr in xrange(2011, 2021): header.append(str(yr)) all_strings.append(header) for i in xrange(len(x_arr)): single_str = [] inp = x_arr[i] c_idx = int(round(inp[0] * len(cancers))) single_str.append(cancers[c_idx]) if inp[1] == 0.0: single_str.append('Female') elif inp[1] == 1.0: single_str.append('Male') if inp[2] == 0.0: single_str.append('Incidence') elif inp[2] == 1.0: single_str.append('Mortality') max_pat_val = self.get_correct_max_pat(inp, max_patients) for k in xrange(num_in_years): single_str.append(str(int(round(inp[k + 3] * max_pat_val)))) for j in xrange(len(pred[0][i])): p = pred[0][i][j] tmp = int(round(p * max_pat_val)) single_str.append(str(tmp)) all_strings.append(single_str) with open('results.csv', 'wb') as f: writer = csv.writer(f) writer.writerows(all_strings) def mkdir_if_not_exist(self, name): if not os.path.exists(name): os.makedirs(name)
class StackedAutoencoder(object): def __init__(self,in_size=8, hidden_size = [500, 500, 250], out_size = 10, batch_size = 10, corruption_levels=[0.1, 0.1, 0.1],dropout=True,drop_rates=[0.5,0.2,0.2]): self.i_size = in_size self.h_sizes = hidden_size self.o_size = out_size self.batch_size = batch_size self.n_layers = len(hidden_size) self.sa_layers = [] self.sa_activations_train = [] self.sa_activations_test = [] self.thetas = [] self.thetas_as_blocks = [] self.dropout = dropout self.drop_rates = drop_rates #check if there are layer_count+1 number of dropout rates (extra one for softmax) if dropout: assert self.n_layers+1 == len(self.drop_rates) self.corruption_levels = corruption_levels #check if there are layer_count number of corruption levels if denoising: assert self.n_layers == len(self.corruption_levels) self.cost_fn_names = ['sqr_err', 'neg_log'] self.x = T.matrix('x') #store the inputs self.y = T.matrix('y') #store the labels for the corresponding inputs self.fine_cost = T.dscalar('fine_cost') #fine tuning cost self.error = T.dscalar('test_error') #test error value #print network info print "Network Info:" print "Layers: %i" %self.n_layers print "Layer sizes: ", print self.h_sizes print "" print "Building the model..." #intializing the network. #crating SparseAutoencoders and storing them in sa_layers #calculating hidden activations (symbolic) and storing them in sa_activations_train/test #there are two types of activations as the calculations are different for train and test with dropout for i in xrange(self.n_layers): if i==0: curr_input_size = self.i_size else: curr_input_size = self.h_sizes[i-1] #if i==0 input is the raw input if i==0: curr_input_train = self.x curr_input_test = self.x #otherwise input is the previous layer's hidden activation else: a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) curr_input_train = self.sa_activations_train[-1] curr_input_test = self.sa_activations_test[-1] sa = SparseAutoencoder(n_inputs=curr_input_size, n_hidden=self.h_sizes[i], x_train=curr_input_train, x_test=curr_input_test, dropout=dropout, dropout_rate=self.drop_rates[i]) self.sa_layers.append(sa) self.thetas.extend(self.sa_layers[-1].get_params()) self.thetas_as_blocks.append(self.sa_layers[-1].get_params()) #-1 index gives the last element a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) self.outLayer = OutputLayer(n_inputs=self.h_sizes[-1], n_outputs=self.o_size, x_train=self.sa_activations_train[-1], x_test = self.sa_activations_test[-1], y=self.y, dropout=self.dropout, dropout_rate=self.drop_rates[-1]) self.lam_fine_tune = T.scalar('lam') self.fine_cost = self.outLayer.get_cost(self.lam_fine_tune,cost_fn=self.cost_fn_names[1]) self.thetas.extend(self.outLayer.theta) #measure test performance self.error = self.outLayer.get_error(self.y) self.predict = self.outLayer.get_output() def load_max_pat(self,file_path): f = open(file_path, 'rb') max_patients = cPickle.load(f) return max_patients def load_pred_ins(self,file_path): f = open(file_path, 'rb') pred_ins = cPickle.load(f) return shared(value=np.asarray(pred_ins,dtype=config.floatX),borrow=True) def load_cancer_data(self,file_path): f = open(file_path,'rb') cancer_data = cPickle.load(f) return cancer_data def load_data(self,file_path='data.pkl',make_predict=True): f = open(file_path, 'rb') if not make_predict: all_ins,all_outs,all_v_in,all_v_out,all_t_in,all_t_out = cPickle.load(f) train_set = [all_ins,all_outs] valid_set = [all_v_in,all_v_out] test_set = [all_t_in,all_t_out] f.close() else: all_ins,all_outs,all_v_in,all_v_out = cPickle.load(f) train_set = [all_ins,all_outs] valid_set = [all_v_in,all_v_out] test_set = [all_v_in,all_v_out] def get_shared_data(data_xy): data_x,data_y = data_xy shared_x = shared(value=np.asarray(data_x,dtype=config.floatX),borrow=True) shared_y = shared(value=np.asarray(data_y,dtype=config.floatX),borrow=True) return shared_x,shared_y train_x,train_y = get_shared_data(train_set) valid_x,valid_y = get_shared_data(valid_set) test_x,test_y = get_shared_data(test_set) all_data = [(train_x,train_y),(valid_x,valid_y),(test_x,test_y)] return all_data def greedy_pre_training(self, train_x, batch_size=1, pre_lr=0.25,denoising=False): pre_train_fns = [] index = T.lscalar('index') lam = T.scalar('lam') beta = T.scalar('beta') rho = T.scalar('rho') i = 0 print "\nCompiling functions for DA layers..." for sa in self.sa_layers: cost, updates = sa.get_cost_and_updates(l_rate=pre_lr, lam=lam, beta=beta, rho=rho, cost_fn=self.cost_fn_names[1], corruption_level=self.corruption_levels[i], denoising=denoising) #the givens section in this line set the self.x that we assign as input to the initial # curr_input value be a small batch rather than the full batch. # however, we don't need to set subsequent inputs to be an only a minibatch # because if self.x is only a portion, you're going to get the hidden activations # corresponding to that small batch of inputs. # Therefore, setting self.x to be a mini-batch is enough to make all the subsequents use # hidden activations corresponding to that mini batch of self.x sa_fn = function(inputs=[index, Param(lam, default=0.25), Param(beta, default=0.25), Param(rho, default=0.2)], outputs=cost, updates=updates, givens={ self.x: train_x[index * batch_size: (index+1) * batch_size] } ) pre_train_fns.append(sa_fn) i = i+1 return pre_train_fns def fine_tuning(self, datasets, batch_size=1, fine_lr=0.2): (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch gparams = T.grad(self.fine_cost, self.thetas) updates = [(param, param - gparam*fine_lr) for param, gparam in zip(self.thetas,gparams)] fine_tuen_fn = function(inputs=[index, Param(self.lam_fine_tune,default=0.25)],outputs=self.fine_cost, updates=updates, givens={ self.x: train_set_x[index * self.batch_size: (index+1) * self.batch_size], self.y: train_set_y[index * self.batch_size: (index+1) * self.batch_size] }) validation_fn = function(inputs=[index],outputs=self.error, givens={ self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size] },name='valid') def valid_score(): return [validation_fn(i) for i in xrange(n_valid_batches)] return fine_tuen_fn, valid_score def train_model(self, datasets=None, pre_epochs=5, fine_epochs=300, pre_lr=0.25, fine_lr=0.2, batch_size=1, lam=0.0001, beta=0.25, rho = 0.2,denoising=False): print "Training Info..." print "Batch size: ", print batch_size print "Pre-training: %f (lr) %i (epochs)" %(pre_lr,pre_epochs) print "Fine-tuning: %f (lr) %i (epochs)" %(fine_lr,fine_epochs) print "Corruption: ", print denoising, print self.corruption_levels print "Weight decay: ", print lam print "Dropout: ", print self.dropout, print self.drop_rates print "Sparcity: ", print "%f (beta) %f (rho)" %(beta,rho) (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size pre_train_fns = self.greedy_pre_training(train_set_x, batch_size=self.batch_size,pre_lr=pre_lr,denoising=denoising) start_time = time.clock() for i in xrange(self.n_layers): print "\nPretraining layer %i" %i for epoch in xrange(pre_epochs): c=[] for batch_index in xrange(n_train_batches): c.append(pre_train_fns[i](index=batch_index, lam=lam, beta=beta, rho=rho)) print 'Training epoch %d, cost ' % epoch, print np.mean(c) end_time = time.clock() training_time = (end_time - start_time) print "Training time: %f" %training_time ######################################################################### ##### Fine Tuning ##### ######################################################################### print "\nFine tuning..." fine_tune_fn,valid_model = self.fine_tuning(datasets,batch_size=self.batch_size,fine_lr=fine_lr) ######################################################################### ##### Early-Stopping ##### ######################################################################### patience = 10 * n_train_batches # look at this many examples patience_increase = 2. improvement_threshold = 1.005 #validation frequency - the number of minibatches to go through before checking validation set validation_freq = min(n_train_batches,patience/2) #we want to minimize best_valid_loss, so we shoudl start with largest best_valid_loss = np.inf test_score = 0. done_looping = False epoch = 0 while epoch < fine_epochs and (not done_looping): epoch = epoch + 1 fine_tune_cost = [] for mini_index in xrange(n_train_batches): cost = fine_tune_fn(index=mini_index,lam=lam) fine_tune_cost.append(cost) #what's the role of iter? iter acts as follows #in first epoch, iter for minibatch 'x' is x #in second epoch, iter for minibatch 'x' is n_train_batches + x #iter is the number of minibatches processed so far... iter = (epoch-1) * n_train_batches + mini_index # this is an operation done in cycles. 1 cycle is iter+1/validation_freq # doing this every epoch if (iter+1) % validation_freq == 0: validation_losses = valid_model() curr_valid_loss = np.mean(validation_losses) print 'epoch %i, minibatch %i/%i, validation error is %f %%' %(epoch, mini_index+1,n_train_batches,curr_valid_loss*100) if curr_valid_loss < best_valid_loss: if ( curr_valid_loss < best_valid_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_valid_loss = curr_valid_loss best_iter = iter print 'Fine tune cost for epoch %i, is %f' %(epoch+1,np.mean(fine_tune_cost)) #patience is here to check the maximum number of iterations it should check #before terminating if patience <= iter: done_looping = True break def get_correct_max_pat(self,x,max_pat): for p in max_pat: if p[0]==x[0] and p[1]==x[1] and p[2]==x[2]: return p[3] return -1 def test_model(self,test_set_x,test_set_y,batch_size= 1,max_pat=None,cancers=None): print '\nTesting the model...' n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar('index') #no update parameters, so this just returns the values it calculate #without objetvie function minimization test_fn = function(inputs=[index], outputs=[self.error,self.y,self.predict,self.x], givens={ self.x: test_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: test_set_y[ index * batch_size: (index + 1) * batch_size ] }, name='test') e=[] pred_vals = [] act_vals = [] errsAll = dict() for batch_index in xrange(n_test_batches): err,act,pred,x = test_fn(batch_index) e.append(err) for p,a,x_i in zip(pred,act,x): errsSingle = [] for p2,a2 in zip(p,a): max_pat_val = self.get_correct_max_pat(x_i,max_pat) diff = abs(p2-a2) if a2>0.0: errTmp = np.mean(diff/a2) else: errTmp = 0.0 errsSingle.append(errTmp) key = self.get_key(x_i,cancers) if key not in errsAll: errsAll[key]=[errsSingle] else: errPrev = errsAll[key] errPrev.append(errsSingle) errsAll[key] = errPrev #if all(v == 0 for v in errsSingle): #pred_vals.append(pred) idx = 2 max_pat_val = self.get_correct_max_pat(x[idx],max_pat) for p in pred[idx]: print int(p*max_pat_val), print "" idx = 5 max_pat_val = self.get_correct_max_pat(x[idx],max_pat) for p in pred[idx]: print int(p*max_pat_val), print "" idx = 8 max_pat_val = self.get_correct_max_pat(x[idx],max_pat) for p in pred[idx]: print int(p*max_pat_val), print "" idx = 2 max_pat_val = self.get_correct_max_pat(x[idx],max_pat) for a in act[idx]: print int(a*max_pat_val), print "" idx = 5 max_pat_val = self.get_correct_max_pat(x[idx],max_pat) for a in act[idx]: print int(a*max_pat_val), print "" idx = 8 max_pat_val = self.get_correct_max_pat(x[idx],max_pat) for a in act[idx]: print int(a*max_pat_val), print "" #act_vals.append(act) #print pred,',',act keys = [] errors = [] for k in errsAll: keys.append(k) tmp3 = errsAll.get(k) tmp = np.asarray(errsAll.get(k)) errsForKey = np.mean(tmp*100,axis=0); errors.append(errsForKey) print 'Test Error for ', k, ": ", errsForKey self.create_csv_errors(keys,errors) def get_key(self,x,cancers): c_idx = int(round(x[0]*len(cancers))) s = cancers[c_idx] if x[1]==1.0: s = s + ",Male" else: s = s + ",Female" if x[2]==1.0: s = s + ",Mortality" else: s = s + ",Incidence" return s def predict_val(self,pred_ins): print 'Predicting ....' #no update parameters, so this just returns the values it calculate #without objetvie function minimization pred_fn = function(inputs=[], outputs=[self.predict], givens={ self.x: pred_ins }, name='predict') e=[] pred_vals = [] act_vals = [] pred = pred_fn() return pred def create_csv_errors(self,keys,errors): all_strings = [] header = ['Cancer','Gender','Status','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020'] all_strings.append(header) for (k,e) in zip(keys,errors): single_str = [] single_str.extend(k.split(",")) for val in e: single_str.append(str(val)) all_strings.append(single_str) with open('errors.csv', 'wb') as f: writer = csv.writer(f) writer.writerows(all_strings) def create_csv(self,x,pred,cancers,max_patients,num_in_years): x_arr = x.get_value() all_strings = [] header = [] header.append('Cancer') header.append('Gender') header.append('Status') for yr in xrange(2006,2011): header.append(str(yr)) for yr in xrange(2011,2021): header.append(str(yr)) all_strings.append(header) for i in xrange(len(x_arr)): single_str = [] inp = x_arr[i] c_idx = int(round(inp[0]*len(cancers))) single_str.append(cancers[c_idx]) if inp[1]==0.0: single_str.append('Female') elif inp[1]==1.0: single_str.append('Male') if inp[2]==0.0: single_str.append('Incidence') elif inp[2]==1.0: single_str.append('Mortality') max_pat_val = self.get_correct_max_pat(inp,max_patients) for k in xrange(num_in_years): single_str.append(str(int(round(inp[k+3]*max_pat_val)))) for j in xrange(len(pred[0][i])): p = pred[0][i][j] tmp = int(round(p*max_pat_val)) single_str.append(str(tmp)) all_strings.append(single_str) with open('results.csv', 'wb') as f: writer = csv.writer(f) writer.writerows(all_strings) def mkdir_if_not_exist(self, name): if not os.path.exists(name): os.makedirs(name)