# rng=npy_rng # ) print "\n\n... fine-tuning the whole network" init_lr = trainer.learningrate prev_cost = numpy.inf epc_cost = 0. patience = 0 avg = 50 crnt_avg = [numpy.inf, ] * avg hist_avg = [numpy.inf, ] * avg for step in xrange(finetune_epc * 50000 / batchsize): # learn # if (step - 1) % 500 == 0: # print "normal cost: ", cost = trainer.step_fast(verbose_stride=500) # if (step - 1) % 500 == 0: # print "gradient cost: ", # cost_grad = trainer2.step_fast(verbose_stride=500) apply_mask[0]() apply_mask[1]() #apply_mask[2]() epc_cost += cost if step % (50000 / batchsize) == 0 and step > 0: # set stop rule ind = (step / (50000 / batchsize)) % avg hist_avg[ind] = crnt_avg[ind] crnt_avg[ind] = epc_cost if sum(hist_avg) < sum(crnt_avg): break
rng=npy_rng ) print "\n\n... fine-tuning the whole network" init_lr = trainer.learningrate prev_cost = numpy.inf epc_cost = 0. patience = 0 avg = 50 crnt_avg = [numpy.inf, ] * avg hist_avg = [numpy.inf, ] * avg for step in xrange(finetune_epc * 50000 / batchsize): # learn if (step - 1) % 500 == 0: print "normal cost: ", cost = trainer.step_fast(verbose_stride=500) if (step - 1) % 500 == 0: print "gradient cost: ", apply_mask_l0() cost_grad = trainer2.step_fast(verbose_stride=500) #apply_mask[1]() #apply_mask[2]() epc_cost += cost if step % (50000 / batchsize) == 0 and step > 0: # set stop rule ind = (step / (50000 / batchsize)) % avg hist_avg[ind] = crnt_avg[ind] crnt_avg[ind] = epc_cost if sum(hist_avg) < sum(crnt_avg): break