def do_training(args=None): goodrefs, badrefs = [EMData.read_images(a) for a in args] nnet_savename = "nnet_classify.hdf" bxsz = goodrefs[0]["nx"] sz = 64 shrinkfac = old_div(float(bxsz), float(sz)) print("Setting up model ...") rng = np.random.RandomState(123) nkernel = [20, 20, 1] ksize = [15, 15, 15] poolsz = [2, 1, 1] batch_size = 10 image_shape = (batch_size, 1, sz, sz) convnet = StackedConvNet(rng, nkernel=nkernel, ksize=ksize, poolsz=poolsz, imageshape=image_shape) convnet.sumout = T.mean(convnet.clslayer.get_image().reshape( (batch_size, -1)), axis=1) convnet.sumout = T.maximum(0, convnet.sumout) #convnet.sumout=T.minimum(1,convnet.sumout) print("Pre-processing particles...") #### here we shrink the particles so they are 64x64 #### and duplicate so there are more than 500 good and 500 bad particles nref_target = 500 data = [] ### particles in flattened numpy array lbs = [] ### labels in flattened numpy array for label, refs in enumerate([badrefs, goodrefs]): nref = len(refs) if nref < 5: print( "Not enough references. Please box at least 5 good and 5 bad reference..." ) return [] ncopy = old_div(nref_target, nref) + 1 for pp in refs: ptl = pp.process("math.fft.resample", {"n": shrinkfac}) ptl.clip_inplace(Region(0, 0, sz, sz)) #ptl.process_inplace("filter.highpass.gauss",{"cutoff_freq":0.005}) #ptl.process_inplace("filter.lowpass.gauss",{"cutoff_freq":0.05}) ptl.process_inplace("filter.highpass.gauss", {"cutoff_pixels": 2}) ptl.process_inplace("filter.lowpass.gauss", {"cutoff_abs": 0.25}) for c in range(ncopy): tr = Transform() tr.set_rotation({ "type": "2d", "alpha": np.random.random() * 360.0 }) img = ptl.process("xform", {"transform": tr}) ar = img.numpy().copy() data.append(ar.flatten()) lbs.append(label) #print shrinkfac rndid = list(range(len(data))) np.random.shuffle(rndid) data = [data[i] for i in rndid] lbs = [lbs[i] for i in rndid] data = np.asarray(data, dtype=theano.config.floatX) #print np.std(data), np.mean(np.std(data,axis=1)) div = np.mean(np.std(data, axis=1)) data /= div #np.std(data)#*2. mx = 4. data[data > mx] = mx data[data < -mx] = -mx lbs = np.asarray(lbs, dtype=theano.config.floatX) train_set_x = theano.shared(data, borrow=True) #### make target output #img=EMData(sz/2,sz/2) #img.process_inplace("testimage.gaussian",{'sigma':5.}) #img.div(img["maximum"]) #gaus=img.numpy().copy().flatten() #gaus=gaus.astype(theano.config.floatX) #lbarrs=np.array([np.zeros_like(gaus, dtype=theano.config.floatX), gaus]) #label_np=lbarrs[lbs] labels = theano.shared(lbs.astype(theano.config.floatX), borrow=True) #print lbs.astype(theano.config.floatX) #print gaus.shape, data.shape, label_np.shape print("Now Training...") classify = get_classify_func(convnet, train_set_x, labels, batch_size) learning_rate = 0.005 weightdecay = 1e-5 n_train_batches = old_div(len(data), batch_size) for epoch in range(20): # go through the training set c = [] for batch_index in range(n_train_batches): err = classify(batch_index, lr=learning_rate, wd=weightdecay) c.append(err) learning_rate *= .96 print('Training epoch %d, cost ' % (epoch), end=' ') print(np.mean(c), ", learning rate", learning_rate) save_model(convnet, nnet_savename) tstsz = 100 convnet.update_shape((tstsz, 1, sz, sz)) test_cls = theano.function(inputs=[], outputs=convnet.clslayer.get_image(), givens={convnet.x: train_set_x[:tstsz]}) tstout = test_cls() trainoutfile = "trainout_nnet_classify.hdf" if os.path.isfile(trainoutfile): os.remove(trainoutfile) for i, mm in enumerate(tstout): t = train_set_x[i].eval().reshape(sz, sz) img = from_numpy(t) #img.process_inplace("normalize") img.write_image(trainoutfile, -1) for m in mm: img = from_numpy(m) nx = img["nx"] img = img.get_clip( Region(old_div(-nx, 2), old_div(-nx, 2), nx * 2, nx * 2)) img.scale(2.) #img.process_inplace("math.fft.resample",{"n":.5}) #img.mult(5) img.process_inplace("threshold.clampminmax.nsigma", {"nsigma": 4}) img.write_image(trainoutfile, -1)
def do_training(args=None): refs0, refs1, badrefs=[EMData.read_images(a) for a in args] nnet_savename="nnet_pickptcls_2cls.hdf" bxsz=refs0[0]["nx"] sz=64 shrinkfac=old_div(float(bxsz),float(sz)) print("Importing dependencies...") if not hasattr(boxerConvNet,'import_done'): if not boxerConvNet.do_import(): print("Cannot import required dependencies..Stop.") print("Setting up model ...") rng = np.random.RandomState(123) nkernel=[20,20,2] ksize=[15,15,15] poolsz=[2,1,1] batch_size=10 image_shape=(batch_size, 1, sz, sz) convnet = StackedConvNet( rng, nkernel=nkernel, ksize=ksize, poolsz=poolsz, imageshape=image_shape ) print("Pre-processing particles...") #### here we shrink the particles so they are 64x64 #### and duplicate so there are more than 500 good and 500 bad particles nref_target=500 data=[] ### particles in flattened numpy array lbs=[] ### labels in flattened numpy array for label, refs in enumerate([badrefs,refs0, refs1]): nref=len(refs) if nref<5: print("Not enough references. Please box at least 5 good and 5 bad reference...") return [] ncopy=old_div(nref_target,nref) + 1 for pp in refs: ptl=pp.process("math.fft.resample",{"n":shrinkfac}) ptl.clip_inplace(Region(0,0, sz, sz)) ptl.process_inplace("filter.highpass.gauss",{"cutoff_freq":0.005}) ptl.process_inplace("filter.lowpass.gauss",{"cutoff_freq":0.05}) for c in range(ncopy): tr=Transform() tr.set_rotation({"type":"2d","alpha":np.random.random()*360.0}) img=ptl.process("xform",{"transform":tr}) ar=img.numpy().copy() data.append(ar.flatten()) lbs.append(label) rndid=list(range(len(data))) np.random.shuffle(rndid) data=[data[i] for i in rndid] lbs=[lbs[i] for i in rndid] data=np.asarray(data,dtype=theano.config.floatX) data/=np.std(data) data[data>2.]=2. data[data<-2.]=-2. lbs=np.asarray(lbs,dtype=int) train_set_x= theano.shared(data,borrow=True) #### make target output img=EMData(old_div(sz,2),old_div(sz,2)) img.process_inplace("testimage.gaussian",{'sigma':5.}) img.div(img["maximum"]) gaus=img.numpy().copy().flatten() gaus=gaus.astype(theano.config.floatX) zero=np.zeros_like(gaus, dtype=theano.config.floatX) g0=np.hstack([gaus, zero]) g1=np.hstack([zero, gaus]) z0=np.hstack([zero, zero]) lbarrs=np.array([z0, g0, g1]) label_np=lbarrs[lbs] #print label_np.shape labels=theano.shared(label_np, borrow=True) print("Now Training...") classify=convnet.get_classify_func(train_set_x,labels,batch_size) learning_rate=0.002 weightdecay=1e-5 n_train_batches = old_div(len(data), batch_size) for epoch in range(20): # go through the training set c = [] for batch_index in range(n_train_batches): err=classify(batch_index, lr=learning_rate, wd=weightdecay) c.append(err) learning_rate*=.96 print('Training epoch %d, cost ' % ( epoch), end=' ') print(np.mean(c),", learning rate",learning_rate) save_model(convnet, nnet_savename) tstsz=100 convnet.update_shape((tstsz, 1, sz, sz)) test_cls = theano.function( inputs=[], outputs=convnet.clslayer.get_image(), givens={ convnet.x: train_set_x[:tstsz] } ) tstout=test_cls() trainoutfile="trainout_nnet.hdf" if os.path.isfile(trainoutfile): os.remove(trainoutfile) for i,mm in enumerate(tstout): t=train_set_x[i].eval().reshape(sz,sz) img=from_numpy(t) img.process_inplace("normalize") img.write_image(trainoutfile, -1) #print m.shape for m in mm: img=from_numpy(m) nx=img["nx"] img=img.get_clip(Region(old_div(-nx,2),old_div(-nx,2),nx*2,nx*2)) img.scale(2.) #img.process_inplace("math.fft.resample",{"n":.5}) img.mult(5) img.write_image(trainoutfile, -1)