def learnAndPredict(Ti, C, TOList): rng = np.random.RandomState(SEED) learning_rate = learning_rate0 print np.mean(Ti[1000,:]) aminW = np.amin(Ti[:1000,:]) amaxW = np.amax(Ti[:1000,:]) Ti[:1000,:] = (Ti[:1000,:] - aminW) / (amaxW - aminW) astdW = np.std(Ti[:1000,:]) ameanW = np.mean(Ti[:1000,:]) Ti[:1000,:] = (Ti[:1000,:] - ameanW) / astdW aminacW = np.amin(Ti[1000,:]) amaxacW = np.amax(Ti[1000,:]) print aminW, amaxW, aminacW, amaxacW Ti[1000,:] = (Ti[1000,:] - aminacW) / (amaxacW - aminacW) astdacW = np.std(Ti[1000,:]) ameanacW = np.mean(Ti[1000,:]) Ti[1000,:] = (Ti[1000,:] - ameanacW) / astdacW ile__ = len(TOList) ileList = np.zeros(ile__) for titer in range(len(TOList)): print np.mean(TOList[titer][1000,:]) TOList[titer][:1000,:] = (TOList[titer][:1000,:] - aminW)/(amaxW - aminW) TOList[titer][:1000,:] = (TOList[titer][:1000,:] - ameanW)/astdW TOList[titer][1000,:] = (TOList[titer][1000,:] - aminacW)/(amaxacW - aminacW) TOList[titer][1000,:] = (TOList[titer][1000,:] - ameanacW)/astdacW _, ileList[titer] = TOList[titer].shape _, ile = Ti.shape N = NN data = []; yyy = []; need = 1; BYL = {}; j= 0; dwa = 0; ONES = []; ZEROS = [] for i in range(NN): for j in range(NN): if i!= j: if C[i][j]==1: ONES.append((i,j)) else: ZEROS.append((i,j)) Nones = len(ONES) rng.shuffle(ONES) Nzeros = len(ZEROS) print Nones print Nzeros Needed = NUM_TRAIN/2 onesPerPair = Needed / Nones + 1 onesIter = 0 jj = 0 while jj < NUM_TRAIN: if jj%300000 == 0: print jj/300000, need = 1 - need if need == 1: pairNo = onesIter % Nones ppp = onesIter / Nones s,t = ONES[pairNo] shift = rng.randint(0, ile - L) onesIter += 1 if need == 0: zer = rng.randint(Nzeros) s,t = ZEROS[zer] del ZEROS[zer] Nzeros -= 1 shift = rng.randint(0, ile - L) x = np.hstack(( Ti[s][shift:shift+L], Ti[t][shift:shift+L], Ti[1000][shift:shift+L])) y = C[s][t] data.append(x); yyy.append(y) jj+=1 data = np.array(data, dtype=theano.config.floatX) is_train = np.array( ([0]*96 + [1,1,2,2]) * (NUM_TRAIN / 100)) yyy = np.array(yyy) train_set_x0, train_set_y0 = np.array(data[is_train==0]), yyy[is_train==0] test_set_x, test_set_y = np.array(data[is_train==1]), yyy[is_train==1] valid_set_x, valid_set_y = np.array(data[is_train==2]), yyy[is_train==2] n_train_batches = len(train_set_y0) / batch_size n_valid_batches = len(valid_set_y) / batch_size n_test_batches = len(test_set_y) / batch_size epoch = T.scalar() index = T.lscalar() x = T.matrix('x') inone2 = T.matrix('inone2') y = T.ivector('y') print '... building the model' #-------- my layers ------------------- #--------------------- layer0_input = x.reshape((batch_size, 1, 3, L)) Cx = 5 layer0 = ConvolutionalLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 3, L), filter_shape=(nkerns[0], 1, 2, Cx), poolsize=(1, 1), fac = 0) ONE = (3 - 2 + 1) / 1 L2 = (L - Cx + 1) / 1 #--------------------- Cx2 = 5 layer1 = ConvolutionalLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], ONE, L2), filter_shape=(nkerns[1], nkerns[0], 2, Cx2), poolsize=(1, 1), activation=ReLU, fac = 0) ONE = (ONE - 2 + 1) /1 L3 = (L2 - Cx2 + 1) /1 #--------------------- Cx3 = 1 layer1b = ConvolutionalLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], ONE, L3), filter_shape=(nkerns[2], nkerns[1], 1, Cx3), poolsize=(1, POOL), activation=ReLU, fac = 0) ONE = (ONE - 1 + 1) /1 L4 = (L3 - Cx3 + 1) /POOL REGx = 100 #--------------------- layer2_input = layer1b.output.flatten(2) print layer2_input.shape use_b = False layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[2]*L4 , n_out=REGx, activation=T.tanh, use_bias = use_b) layer3 = LogisticRegression(input=layer2.output, n_in=REGx, n_out=2) cost = layer3.negative_log_likelihood(y) out_x2 = theano.shared(np.asarray(np.zeros((N,L)), dtype=theano.config.floatX)) inone2 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX)) inone3 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX)) inone4 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX)) test_set_x = theano.shared(np.asarray(test_set_x, dtype=theano.config.floatX)) train_set_x = theano.shared(np.asarray(train_set_x0, dtype=theano.config.floatX)) train_set_y = T.cast(theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)), 'int32') test_set_y = T.cast(theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)), 'int32') valid_set_y = T.cast(theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)), 'int32') valid_set_x = theano.shared(np.asarray(valid_set_x, dtype=theano.config.floatX)) test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) mom_start = 0.5; mom_end = 0.98; mom_epoch_interval = n_epochs * 1.0 #### @@@@@@@@@@@ class_params0 = [layer3, layer2, layer1, layer1b, layer0] class_params = [ param for layer in class_params0 for param in layer.params ] gparams = [] for param in class_params: gparam = T.grad(cost, param) gparams.append(gparam) gparams_mom = [] for param in class_params: gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) mom = ifelse(epoch < mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam for param, gparam_mom in zip(class_params, gparams_mom): stepped_param = param + updates[gparam_mom] squared_filter_length_limit = 15.0 if param.get_value(borrow=True).ndim == 2: col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param output = cost train_model = theano.function(inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) keep = theano.function([index], layer3.errorsFull(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}, on_unused_input='warn') timer = time.clock() print "finished reading", (timer - start_time0) /60. , "minutes " # TRAIN MODEL # print '... training' validation_frequency = n_train_batches; best_params = None; best_validation_loss = np.inf best_iter = 0; test_score = 0.; epochc = 0; while (epochc < n_epochs): epochc = epochc + 1 learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc)/n_epochs)) for minibatch_index in xrange(n_train_batches): iter = (epochc - 1) * n_train_batches + minibatch_index cost_ij = train_model(epochc, minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print(' %i) err %.2f ' % (epochc, this_validation_loss/10)), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size if this_validation_loss < best_validation_loss or epochc % 30 == 0: best_validation_loss = this_validation_loss best_iter = iter test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epochc, minibatch_index + 1, n_train_batches, test_score/10)) ############ timel = time.clock() print "finished learning", (timel - timer) /60. , "minutes " ppm = theano.function([index], layer3.pred_proba_mine(), givens={ x: T.horizontal_stack(T.tile(inone2, (batch_size ,1)), out_x2[index * batch_size: (index + 1) * batch_size], T.tile(inone3, (batch_size ,1))), y: train_set_y[0 * (batch_size): (0 + 1) * (batch_size)] }, on_unused_input='warn') NONZERO = (N*N-N) gc.collect() RESList = [np.zeros((N,N)) for it in range(ile__)] for __net in range(ile__): TO = TOList[__net] ileO = ileList[__net] RES = RESList[__net] shift = 0.1 DELTAshift = (ileO-L) / (Q-1) print "DELTAshift:", DELTAshift for q in range (Q): dataO = []; print (q+1),"/", Q , " ", out_x2.set_value(np.asarray(np.array(TO[:,shift:shift+L]), dtype=theano.config.floatX)) PARTIAL = np.zeros((N,N)) inone3.set_value(np.asarray(np.array(TO[1000][shift:shift+L]).reshape(1,L), dtype=theano.config.floatX)) for i in range(N): inone2.set_value(np.asarray(np.array(TO[i][shift:shift+L]).reshape(1,L), dtype=theano.config.floatX)) p = [ppm(ii) for ii in xrange( N / batch_size)] for pos in range(N): if pos != i: PARTIAL[i][pos] += p[pos / batch_size][pos % batch_size][1] for i in range(N): for j in range(N): RES[i][j] += PARTIAL[i][j] shift += DELTAshift print "Finished", __net RESList[__net] = RES/np.max(RES) gc.collect() end_time = time.clock() print "finished predicting", (end_time - timel) /60. , "minutes ", str(nkerns), "using SEED = ", SEED print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time0) / 60.)) return RESList
def learnAndPredict(Ti, C, TOList): rng = np.random.RandomState(SEED) learning_rate = learning_rate0 print np.mean(Ti[1000, :]) aminW = np.amin(Ti[:1000, :]) amaxW = np.amax(Ti[:1000, :]) Ti[:1000, :] = (Ti[:1000, :] - aminW) / (amaxW - aminW) astdW = np.std(Ti[:1000, :]) ameanW = np.mean(Ti[:1000, :]) Ti[:1000, :] = (Ti[:1000, :] - ameanW) / astdW aminacW = np.amin(Ti[1000, :]) amaxacW = np.amax(Ti[1000, :]) print aminW, amaxW, aminacW, amaxacW Ti[1000, :] = (Ti[1000, :] - aminacW) / (amaxacW - aminacW) astdacW = np.std(Ti[1000, :]) ameanacW = np.mean(Ti[1000, :]) Ti[1000, :] = (Ti[1000, :] - ameanacW) / astdacW ile__ = len(TOList) ileList = np.zeros(ile__) for titer in range(len(TOList)): print np.mean(TOList[titer][1000, :]) TOList[titer][:1000, :] = (TOList[titer][:1000, :] - aminW) / (amaxW - aminW) TOList[titer][:1000, :] = (TOList[titer][:1000, :] - ameanW) / astdW TOList[titer][1000, :] = (TOList[titer][1000, :] - aminacW) / (amaxacW - aminacW) TOList[titer][1000, :] = (TOList[titer][1000, :] - ameanacW) / astdacW _, ileList[titer] = TOList[titer].shape _, ile = Ti.shape N = NN data = [] yyy = [] need = 1 BYL = {} j = 0 dwa = 0 ONES = [] ZEROS = [] for i in range(NN): for j in range(NN): if i != j: if C[i][j] == 1: ONES.append((i, j)) else: ZEROS.append((i, j)) Nones = len(ONES) rng.shuffle(ONES) Nzeros = len(ZEROS) print Nones print Nzeros Needed = NUM_TRAIN / 2 onesPerPair = Needed / Nones + 1 onesIter = 0 jj = 0 while jj < NUM_TRAIN: if jj % 300000 == 0: print jj / 300000, need = 1 - need if need == 1: pairNo = onesIter % Nones ppp = onesIter / Nones s, t = ONES[pairNo] shift = rng.randint(0, ile - L) onesIter += 1 if need == 0: zer = rng.randint(Nzeros) s, t = ZEROS[zer] del ZEROS[zer] Nzeros -= 1 shift = rng.randint(0, ile - L) x = np.hstack((Ti[s][shift:shift + L], Ti[t][shift:shift + L], Ti[1000][shift:shift + L])) y = C[s][t] data.append(x) yyy.append(y) jj += 1 data = np.array(data, dtype=theano.config.floatX) is_train = np.array(([0] * 96 + [1, 1, 2, 2]) * (NUM_TRAIN / 100)) yyy = np.array(yyy) train_set_x0, train_set_y0 = np.array( data[is_train == 0]), yyy[is_train == 0] test_set_x, test_set_y = np.array(data[is_train == 1]), yyy[is_train == 1] valid_set_x, valid_set_y = np.array( data[is_train == 2]), yyy[is_train == 2] n_train_batches = len(train_set_y0) / batch_size n_valid_batches = len(valid_set_y) / batch_size n_test_batches = len(test_set_y) / batch_size epoch = T.scalar() index = T.lscalar() x = T.matrix('x') inone2 = T.matrix('inone2') y = T.ivector('y') print '... building the model' #-------- my layers ------------------- #--------------------- layer0_input = x.reshape((batch_size, 1, 3, L)) Cx = 5 layer0 = ConvolutionalLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 3, L), filter_shape=(nkerns[0], 1, 2, Cx), poolsize=(1, 1), fac=0) ONE = (3 - 2 + 1) / 1 L2 = (L - Cx + 1) / 1 #--------------------- Cx2 = 5 layer1 = ConvolutionalLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], ONE, L2), filter_shape=(nkerns[1], nkerns[0], 2, Cx2), poolsize=(1, 1), activation=ReLU, fac=0) ONE = (ONE - 2 + 1) / 1 L3 = (L2 - Cx2 + 1) / 1 #--------------------- Cx3 = 1 layer1b = ConvolutionalLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], ONE, L3), filter_shape=(nkerns[2], nkerns[1], 1, Cx3), poolsize=(1, POOL), activation=ReLU, fac=0) ONE = (ONE - 1 + 1) / 1 L4 = (L3 - Cx3 + 1) / POOL REGx = 100 #--------------------- layer2_input = layer1b.output.flatten(2) print layer2_input.shape use_b = False layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[2] * L4, n_out=REGx, activation=T.tanh, use_bias=use_b) layer3 = LogisticRegression(input=layer2.output, n_in=REGx, n_out=2) cost = layer3.negative_log_likelihood(y) out_x2 = theano.shared( np.asarray(np.zeros((N, L)), dtype=theano.config.floatX)) inone2 = theano.shared( np.asarray(np.zeros((1, L)), dtype=theano.config.floatX)) inone3 = theano.shared( np.asarray(np.zeros((1, L)), dtype=theano.config.floatX)) inone4 = theano.shared( np.asarray(np.zeros((1, L)), dtype=theano.config.floatX)) test_set_x = theano.shared( np.asarray(test_set_x, dtype=theano.config.floatX)) train_set_x = theano.shared( np.asarray(train_set_x0, dtype=theano.config.floatX)) train_set_y = T.cast( theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)), 'int32') test_set_y = T.cast( theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)), 'int32') valid_set_y = T.cast( theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)), 'int32') valid_set_x = theano.shared( np.asarray(valid_set_x, dtype=theano.config.floatX)) test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) mom_start = 0.5 mom_end = 0.98 mom_epoch_interval = n_epochs * 1.0 #### @@@@@@@@@@@ class_params0 = [layer3, layer2, layer1, layer1b, layer0] class_params = [param for layer in class_params0 for param in layer.params] gparams = [] for param in class_params: gparam = T.grad(cost, param) gparams.append(gparam) gparams_mom = [] for param in class_params: gparam_mom = theano.shared( np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) mom = ifelse( epoch < mom_epoch_interval, mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval), mom_end) updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam for param, gparam_mom in zip(class_params, gparams_mom): stepped_param = param + updates[gparam_mom] squared_filter_length_limit = 15.0 if param.get_value(borrow=True).ndim == 2: col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param output = cost train_model = theano.function( inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) keep = theano.function( [index], layer3.errorsFull(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, on_unused_input='warn') timer = time.clock() print "finished reading", (timer - start_time0) / 60., "minutes " # TRAIN MODEL # print '... training' validation_frequency = n_train_batches best_params = None best_validation_loss = np.inf best_iter = 0 test_score = 0. epochc = 0 while (epochc < n_epochs): epochc = epochc + 1 learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc) / n_epochs)) for minibatch_index in xrange(n_train_batches): iter = (epochc - 1) * n_train_batches + minibatch_index cost_ij = train_model(epochc, minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print(' %i) err %.2f ' % (epochc, this_validation_loss / 10) ), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size if this_validation_loss < best_validation_loss or epochc % 30 == 0: best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print( (' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epochc, minibatch_index + 1, n_train_batches, test_score / 10)) ############ timel = time.clock() print "finished learning", (timel - timer) / 60., "minutes " ppm = theano.function( [index], layer3.pred_proba_mine(), givens={ x: T.horizontal_stack( T.tile(inone2, (batch_size, 1)), out_x2[index * batch_size:(index + 1) * batch_size], T.tile(inone3, (batch_size, 1))), y: train_set_y[0 * (batch_size):(0 + 1) * (batch_size)] }, on_unused_input='warn') NONZERO = (N * N - N) gc.collect() RESList = [np.zeros((N, N)) for it in range(ile__)] for __net in range(ile__): TO = TOList[__net] ileO = ileList[__net] RES = RESList[__net] shift = 0.1 DELTAshift = (ileO - L) / (Q - 1) print "DELTAshift:", DELTAshift for q in range(Q): dataO = [] print(q + 1), "/", Q, " ", out_x2.set_value( np.asarray(np.array(TO[:, shift:shift + L]), dtype=theano.config.floatX)) PARTIAL = np.zeros((N, N)) inone3.set_value( np.asarray(np.array(TO[1000][shift:shift + L]).reshape(1, L), dtype=theano.config.floatX)) for i in range(N): inone2.set_value( np.asarray(np.array(TO[i][shift:shift + L]).reshape(1, L), dtype=theano.config.floatX)) p = [ppm(ii) for ii in xrange(N / batch_size)] for pos in range(N): if pos != i: PARTIAL[i][pos] += p[pos / batch_size][pos % batch_size][1] for i in range(N): for j in range(N): RES[i][j] += PARTIAL[i][j] shift += DELTAshift print "Finished", __net RESList[__net] = RES / np.max(RES) gc.collect() end_time = time.clock() print "finished predicting", (end_time - timel) / 60., "minutes ", str( nkerns), "using SEED = ", SEED print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time0) / 60.)) return RESList
def train_logisticRegression(learning_rate=0.13, n_epochs=1000, dataset="mnist.pkl.gz", batch_size=600): ############################################################### # Get Data ############################################################### # Load datasets datasets = shared_dataset(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # Visualize some data samples plot_image(train_set_x.get_value(borrow=True)[10], 28, 28) plot_image(valid_set_x.get_value(borrow=True)[15], 28, 28) plot_image(test_set_x.get_value(borrow=True)[5], 28, 28) # Split sets into batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ############################################################### # Build model ############################################################### # Allocate symbolic variables index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # Build classifier classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # Define gradient descent cost = classifier.negative_log_likelihood(y) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) updates = [(classifier.W, classifier.W - g_W * learning_rate), (classifier.b, classifier.b - g_b * learning_rate)] # Test function test_model = theano.function( inputs=[index], outputs=classifier.errorRate(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # Validation function validate_model = theano.function( inputs=[index], outputs=classifier.errorRate(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # Training function train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############################################################### # Train Model ############################################################### print("Training the model...") patience = 5000 # look at this many batches regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for batch_index in range(n_train_batches): batch_avg_cost = train_model(batch_index) iter = (epoch - 1) * n_train_batches + batch_index if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, batch %i/%i, validation error rate %f %%' % (epoch, batch_index + 1, n_train_batches, this_validation_loss * 100)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print(' epoch %i, batch %i/%i, test error rate %f %%' % (epoch, batch_index + 1, n_train_batches, test_score * 100)) with open('best_model.pkl', 'wb') as f: _pickle.dump(classifier, f) if (patience <= iter): done_looping = True break end_time = timeit.default_timer() print(('Optimization completed with best validation loss of %f %%,' 'with test score of %f %%.') % (best_validation_loss * 100., test_score * 100.)) print('The code ran for %d epochs, withiin %f seconds.' % (epoch, end_time - start_time))
class SdA(object): def __init__( self, numpy_rng, n_ins, n_outs, hidden_layers_sizes, corruption_levels=[0.1, 0.1], theano_rng=None ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins=n_ins self.n_outs=n_outs # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid ) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.append(sigmoid_layer.theta) # Construct a denoising autoencoder that shared weights with this layer dA_layer = dA( numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], theta=sigmoid_layer.theta ) self.dA_layers.append(dA_layer) sda_input = T.matrix('sda_input') self.da_layers_output_size = hidden_layers_sizes[-1] self.get_da_output = theano.function( inputs=[sda_input], outputs=self.sigmoid_layers[-1].output.reshape((-1, self.da_layers_output_size)), givens={ self.x: sda_input } ) self.logLayer = LogisticRegression( rng = numpy.random.RandomState(), input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) #self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def evaluate_lenet5(datasets, learning_rate=0.1, n_epochs=10, nkerns=[20, 50], batch_size=2): """ Demonstrates lenet on MNIST dataset :param datasets: :param batch_size: :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 1, 47)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 1, 47), filter_shape=(nkerns[0], 1, 1, 6), poolsize=(1, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 1, 21), filter_shape=(nkerns[1], nkerns[0], 1, 6), poolsize=(1, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def evaluate_lenet5( learning_rate = 0.1, n_epochs = 200, dataset = 'mnist.pkl.gz', nkerns = [20, 50], batch_size = 500): """ learning_rate (type: float; content: learning rate used (factor for the stochastic gradient) n_epochs (type: int; content: maximal number of epochs to run the optimizer) dataset (type: string; content: path to the dataset used for training /testing (MNIST here)) nkerns (type: list of ints; content: number of kernels on each layer """ # Initialise random number (used to initialise weights) rng = numpy.random.RandomState(23455) ## -------------------------------------------------------------------------------------- ## Load MNIST data (using load_data() [defined above], and the dataset path) ## -------------------------------------------------------------------------------------- datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # devided into training set... valid_set_x, valid_set_y = datasets[1] # validation set test_set_x, test_set_y = datasets[2] # and test set # Compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ######################################################################################### # BUILD THE MODEL # ######################################################################################### print('... building the model') # Allocate (initialise) symbolic variables and generate symbolic variables for input (x and y represent a minibatch) index = T.lscalar() # index to a [mini]batch (lscalar() returns a zero-dimension value) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ## -------------------------------------------------------------------------------------- ## Define the FIRST layer ## -------------------------------------------------------------------------------------- # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) to a 4D tensor, # compatible with our LeNetConvPoolLayer. (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input = layer0_input, image_shape = (batch_size, 1, 28, 28), filter_shape = (nkerns[0], 1, 5, 5), poolsize = (2, 2) ) ## -------------------------------------------------------------------------------------- ## Define the SECOND layer ## -------------------------------------------------------------------------------------- # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) ## -------------------------------------------------------------------------------------- ## Define the THIRD layer ## -------------------------------------------------------------------------------------- # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) ## -------------------------------------------------------------------------------------- ## Define the FOURTH layer ## -------------------------------------------------------------------------------------- # Classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) ## -------------------------------------------------------------------------------------- ## Define cost and test functions ## -------------------------------------------------------------------------------------- cost = layer3.negative_log_likelihood(y) # Calulate the cost (negative_log_likelihood) # Compile a Theano function that computes the mistakes that are made by the model on a minibatch # Both for the test model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # And for the validation model validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # Create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # Create a list of gradients for all model parameters grads = T.grad(cost, params) ## Specify how to update the parameters of the model """ train_model is a function that updates the model parameters by SGD. Since this model has many parameters, it would be tedious to manually create an update rule for each model parameter. We thus create the updates list by automatically looping over all (params[i], grads[i]) pairs. """ updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # Compile a Theano function `train_model` that returns the cost, but at the same time updates # the parameter of the model based on the rules defined in `updates`. train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ######################################################################################### # TRAIN MODEL # ######################################################################################### print('... training the model') ## -------------------------------------------------------------------------------------- ## Define early-stopping parameters ## -------------------------------------------------------------------------------------- patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many minibatches before checking the network # on the validation set; in this case we check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() ## -------------------------------------------------------------------------------------- ## Start iterating loop (i.e. through multibatches for repeated SGD) ## -------------------------------------------------------------------------------------- epoch = 0 done_looping = False # Loop through epochs while (epoch < n_epochs) and (not done_looping): # n_epochs defined in definition of this large function epoch = epoch + 1 # Increment epoch on each loop # Loop through minibatches for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index # iteration number ## On every 100 iterations... if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) # When the iteration is fully divisible by the validation frequency if (iter + 1) % validation_frequency == 0: # Check for performance (zero-one loss) on validation data set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) # Print current validation test results print('epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # If we got the best validation score until now if this_validation_loss < best_validation_loss: # ...and if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # Save the best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # Test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) # Print test results print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ## ----------------------------------------------------------------- ## Save model parameters using cPickle ## ----------------------------------------------------------------- fname = 'bestCNNModel.pkl' saveFile = open(fname, 'wb') # model weights cPickle.dump(layer0.W, saveFile) cPickle.dump(layer0.b, saveFile) cPickle.dump(layer1.W, saveFile) cPickle.dump(layer1.b, saveFile) cPickle.dump(layer2.W, saveFile) cPickle.dump(layer2.b, saveFile) """ # hyperparameters and performance cPickle.dump(learning_rate, saveFile) cPickle.dump(best_validation_loss, saveFile) cPickle.dump(test_score, saveFile) cPickle.dump(test_losses, saveFile) cPickle.dump(nkerns, saveFile) cPickle.dump(n_epochs, saveFile) cPickle.dump(batch_size, saveFile) """ saveFile.close() # Else, if patience is expired if patience <= iter: done_looping = True # Break the loop break # Now that the loop has ended... end_time = timeit.default_timer() # note the time of loop ending # Print the ending results print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
class SdA(object): def __init__(self, numpy_rng, n_ins, n_outs, hidden_layers_sizes, corruption_levels=[0.1, 0.1], theano_rng=None): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins = n_ins self.n_outs = n_outs # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.append(sigmoid_layer.theta) # Construct a denoising autoencoder that shared weights with this layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], theta=sigmoid_layer.theta) self.dA_layers.append(dA_layer) sda_input = T.matrix('sda_input') self.da_layers_output_size = hidden_layers_sizes[-1] self.get_da_output = theano.function( inputs=[sda_input], outputs=self.sigmoid_layers[-1].output.reshape( (-1, self.da_layers_output_size)), givens={self.x: sda_input}) self.logLayer = LogisticRegression( rng=numpy.random.RandomState(), input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) #self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)