def BlockGLSTMScanArrayToArray(rng, inlayer, szgate, szhidden, blocksize = 10, warmup = 10, outf = T.tanh, noot = False, backwards = False, shareLayer = None, warmupHidden = None, warmupOut = None): if backwards: inout = inlayer.output[::-1] else: inout = inlayer.output if warmupHidden!=None: if backwards: whid = warmupHidden.output[::-1] else: whid = warmupHidden.output if warmupOut!=None: if backwards: wout = warmupOut.output[::-1] else: wout = warmupOut.output #PrepareData totblks = (inlayer.output.shape[0]+blocksize-1) / blocksize def oneStep(inp, laststate, lastout): inl = SymbolLayer(inp, (totblks,inlayer.output_shape[1])) lstmout = LCollect(GLSTM(rng, inl, laststate, lastout, szgate, szhidden, outf = outf, noot = noot, shareLayer = shareLayer)) return lstmout.hidden, lstmout.output stackinp = T.alloc(dtypeX(0), totblks, blocksize+warmup, inlayer.output_shape[1]) #Fill block data stackinp = T.set_subtensor(stackinp[:-1,warmup:],inout[:(totblks-1)*blocksize].reshape((totblks-1, blocksize, inlayer.output.shape[1]))) stackinp = T.set_subtensor(stackinp[-1,warmup:warmup+inlayer.output.shape[0]-(totblks-1)*blocksize],inout[(totblks-1)*blocksize:].reshape((inlayer.output.shape[0]-(totblks-1)*blocksize, inlayer.output.shape[1]))) #Fill block warmup data stackinp = T.set_subtensor(stackinp[1:,:warmup],stackinp[:-1,-warmup:]) stackinp = stackinp.dimshuffle(1,0,2) LPush() #A large number firsthidden = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupHidden: firsthidden = T.set_subtensor(firsthidden[warmup/blocksize+1:], whid[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize]) firstout = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupOut: firstout = T.set_subtensor(firstout[warmup/blocksize+1:], wout[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize]) (hiddens, outs), updates = theano.scan(fn=oneStep, outputs_info = [firsthidden, firstout], sequences=stackinp) lstml = LPop()[0] #ExpandData hiddens = hiddens.dimshuffle(1,0,2) hiddens = hiddens[:,warmup:].reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]] outs = outs.dimshuffle(1,0,2) outs = outs[:,warmup:].reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]] if backwards: hiddens = hiddens[::-1] outs = outs[::-1] global extraHid extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden)) return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), lstml
def LSTMScanArrayToArray(rng, inlayer, szhidden, outf = T.tanh, backwards = False): def oneStep(inp, laststate, lastout): inl = SymbolLayer(inp, (1,inlayer.output_shape[1])) lstmout = LCollect(LSTM(rng, inl, laststate, lastout, szhidden, outf = outf)) return lstmout.hidden, lstmout.output LPush() firsthidden = T.alloc(dtypeX(0), 1,szhidden) firstout = T.alloc(dtypeX(0), 1,szhidden) (hiddens, outs), updates = theano.scan(fn=oneStep, outputs_info = [firsthidden, firstout], sequences=inlayer.output, go_backwards=backwards) lstml = LPop()[0] return SymbolLayer(outs.reshape((inlayer.output_shape[0], szhidden)), (inlayer.output_shape[0], szhidden)), lstml
def __init__(self, input, orig): assert isinstance(orig, ShrinkShapeFractal1D) self.output_shape = orig.origlayer.output_shape self.output = T.alloc(dtypeX(0), orig.origlayer.output.shape[0], orig.origlayer.output.shape[1]) self.output = T.set_subtensor(self.output[::2], input.output) self.output = T.set_subtensor( self.output[1::2], input.output[:orig.origlayer.output.shape[0] / 2])
def LSTMScanArrayToArray(rng, inlayer, szhidden, outf=T.tanh, backwards=False): def oneStep(inp, laststate, lastout): inl = SymbolLayer(inp, (1, inlayer.output_shape[1])) lstmout = LCollect( LSTM(rng, inl, laststate, lastout, szhidden, outf=outf)) return lstmout.hidden, lstmout.output LPush() firsthidden = T.alloc(dtypeX(0), 1, szhidden) firstout = T.alloc(dtypeX(0), 1, szhidden) (hiddens, outs), updates = theano.scan(fn=oneStep, outputs_info=[firsthidden, firstout], sequences=inlayer.output, go_backwards=backwards) lstml = LPop()[0] return SymbolLayer(outs.reshape((inlayer.output_shape[0], szhidden)), (inlayer.output_shape[0], szhidden)), lstml
def __init__(self, input, total, current): assert len(input.output_shape)==3 #SymbolPoolsize self.poolsize = poolsize = T.cast(T.floor(input.output_shape[2] ** (1.0/(total-current+1))),'int32') self.poolresp = poolresp = T.cast((input.output_shape[2] + poolsize - 1) / poolsize,'int32') #Pad to normal size rectinput = T.alloc(dtypeX(0), input.output_shape[0], input.output_shape[1], poolsize*poolresp) rectinput = T.set_subtensor(rectinput[:,:,:input.output_shape[2]],input.output) rectinput = rectinput.reshape((input.output_shape[0], input.output_shape[1], poolresp, poolsize)) #Reshape and mean axis out self.output = T.mean(rectinput,3) #Special deal with last index self.output = T.set_subtensor(self.output[:,:,-1], T.mean(rectinput[:,:,-1,:input.output_shape[2]-(poolresp-1)*poolsize],2)) self.output_shape = (input.output_shape[0], input.output_shape[1], poolresp)
def __init__(self, input, orig): assert isinstance(orig, ShrinkShapeFractal1D) self.output_shape = orig.origlayer.output_shape self.output = T.alloc(dtypeX(0), orig.origlayer.output.shape[0], orig.origlayer.output.shape[1]) self.output = T.set_subtensor(self.output[::2], input.output) self.output = T.set_subtensor(self.output[1::2], input.output[:orig.origlayer.output.shape[0]/2])
def BlockLSTMUnrollArrayToArray(rng, inlayer, szhidden, blocksize = 10, warmup = 10, outf = T.tanh, noot = False, backwards = False, shareLayer = None, warmupHidden = None, warmupOut = None): if backwards: inout = inlayer.output[::-1] else: inout = inlayer.output if warmupHidden!=None: if backwards: whid = warmupHidden.output[::-1] else: whid = warmupHidden.output if warmupOut!=None: if backwards: wout = warmupOut.output[::-1] else: wout = warmupOut.output #PrepareData totblks = (inlayer.output.shape[0]+blocksize-1) / blocksize def oneStep(inp, laststate, lastout): inl = SymbolLayer(inp, (totblks,inlayer.output_shape[1])) lstmout = LSTM(rng, inl, laststate, lastout, szhidden, outf = outf, noot = noot, shareLayer = shareLayer) return lstmout.hidden, lstmout.output, lstmout stackinp = T.alloc(dtypeX(0), totblks, blocksize+warmup, inlayer.output_shape[1]) #Fill block data stackinp = T.set_subtensor(stackinp[:-1,warmup:],inout[:(totblks-1)*blocksize].reshape((totblks-1, blocksize, inlayer.output.shape[1]))) stackinp = T.set_subtensor(stackinp[-1,warmup:warmup+inlayer.output.shape[0]-(totblks-1)*blocksize],inout[(totblks-1)*blocksize:].reshape((inlayer.output.shape[0]-(totblks-1)*blocksize, inlayer.output.shape[1]))) #Fill block warmup data stackinp = T.set_subtensor(stackinp[1:,:warmup],stackinp[:-1,-warmup:]) stackinp = stackinp.dimshuffle(1,0,2) #A large number firsthidden = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupHidden: firsthidden = T.set_subtensor(firsthidden[warmup/blocksize+1:], whid[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize]) firstout = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupOut: firstout = T.set_subtensor(firstout[warmup/blocksize+1:], wout[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize]) hiddens = [] outs = [] firstshare = None for i in range(warmup): firsthidden, firstout, shareLayer = oneStep(stackinp[i], firsthidden, firstout) if firstshare==None: firstshare = shareLayer for i in range(blocksize): firsthidden, firstout, shareLayer = oneStep(stackinp[i+warmup], firsthidden, firstout) if firstshare==None: firstshare = shareLayer hiddens.append(firsthidden) outs.append(firstout) hiddens = T.stack(*hiddens) outs = T.stack(*outs) #ExpandData (warmup is automatically eatten) hiddens = hiddens.dimshuffle(1,0,2) hiddens = hiddens.reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]] outs = outs.dimshuffle(1,0,2) outs = outs.reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]] if backwards: hiddens = hiddens[::-1] outs = outs[::-1] global extraHid extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden)) return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), firstshare
def __init__(self, rng, originput, lastinput, hiddenl, hiddenr, stepl, stepr, outputs = None, sharedlayers = None, nlscan = 'tanh', nlout = 'tanh'): if hiddenr == None: hiddenr = hiddenl if stepr == None: stepr = stepl assert len(originput.output_shape)==3 assert lastinput == None or originput.output_shape[0] == lastinput.output_shape[0] ilayers = originput.output_shape[1] if sharedlayers!=None: wd = sharedlayers.__dict__ else: wd = {} self.Win_hl = Win_hl = wd.get('Win_hl') or self.RNG_GEN(rng, ilayers, hiddenl) self.Win_hr = Win_hr = wd.get('Win_hr') or self.RNG_GEN(rng, ilayers, hiddenr) self.Wprev = Wprev = wd.get('Wprev') or self.RNG_GEN(rng, hiddenl, hiddenl) self.Wnext = Wnext = wd.get('Wnext') or self.RNG_GEN(rng, hiddenr, hiddenr) self.bl = bl = wd.get('bl') or self.ZERO_GEN(hiddenl) self.br = br = wd.get('br') or self.ZERO_GEN(hiddenr) if lastinput != None: ilast = lastinput.output_shape[1] self.Wlastl = Wlastl = wd.get('Wlastl') or self.RNG_GEN(rng, ilast, hiddenl) self.Wlastr = Wlastr = wd.get('Wlastr') or self.RNG_GEN(rng, ilast, hiddenr) if outputs != None: self.Woutput = Woutput = wd.get('Woutput') or self.RNG_GEN(rng, hiddenl+hiddenr, outputs) self.boutput = boutput = wd.get('boutput') or self.ZERO_GEN(outputs) else: #A aggregation output solution pass vl0 = self.ZERO_GEN_SYMBOL(originput.output_shape[0], hiddenl) vr0 = self.ZERO_GEN_SYMBOL(originput.output_shape[0], hiddenr) #Real work self.odim = odim = (originput.output_shape[1],originput.output_shape[0]) ldim = None self.odl = odl = (hiddenl,originput.output_shape[0]) self.odr = odr = (hiddenr,originput.output_shape[0]) self.T_orig_o = T_orig_o = originput.output.dimshuffle(2,0,1) if lastinput != None: self.ldim = ldim = (lastinput.output_shape[1],lastinput.output_shape[0]) self.T_last_o = T_last_o = lastinput.output.dimshuffle(2,0,1) statL = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenl) for steps in range(stepl): if steps!=0: statL = T.set_subtensor(statL[1:], statL[:-1]) statL = T.set_subtensor(statL[0], dtypeX(0)) t = T.tensordot(statL, Wprev, [[2], [1]]) + T.tensordot(T_orig_o, Win_hl, [[2], [1]]) + T.tensordot(T_last_o, Wlastl, [[2], [1]]) statL = nonlinear(t + bl.dimshuffle('x','x',0), nlscan) self.currl = currl = statL statR = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenr) for steps in range(stepr): if steps!=0: statR = T.set_subtensor(statR[:-1], statR[1:]) statR = T.set_subtensor(statR[-1], dtypeX(0)) t = T.tensordot(statR, Wnext, [[2], [1]]) + T.tensordot(T_orig_o, Win_hr, [[2], [1]]) + T.tensordot(T_last_o, Wlastr, [[2], [1]]) statR = nonlinear(t + br.dimshuffle('x','x',0), nlscan) self.currr = currr = statR else: statL = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenl) for steps in range(stepl): if steps!=0: statL = T.set_subtensor(statL[1:], statL[:-1]) statL = T.set_subtensor(statL[0], dtypeX(0)) t = T.tensordot(statL, Wprev, [[2], [1]]) + T.tensordot(T_orig_o, Win_hl, [[2], [1]]) statL = nonlinear(t + bl.dimshuffle('x','x',0), nlscan) self.currl = currl = statL statR = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenr) for steps in range(stepr): if steps!=0: statR = T.set_subtensor(statR[:-1], statR[1:]) statR = T.set_subtensor(statR[-1], dtypeX(0)) t = T.tensordot(statR, Wnext, [[2], [1]]) + T.tensordot(T_orig_o, Win_hr, [[2], [1]]) statR = nonlinear(t + br.dimshuffle('x','x',0), nlscan) self.currr = currr = statR #Make output aggout = T.concatenate([currl, currr], axis=2) if outputs != None: #Transpose through another layer self.output = nonlinear(T.tensordot(aggout, Woutput, [[2],[1]]) + boutput.dimshuffle('x','x',0), nlout).dimshuffle(1,2,0) self.output_shape = [originput.output_shape[0], outputs, originput.output_shape[2]] else: self.output = aggout.dimshuffle(1,2,0) self.output_shape = [originput.output_shape[0], hiddenl+hiddenr, originput.output_shape[2]] self.params = [Win_hl, Win_hr, Wprev, Wnext, bl, br] if lastinput!=None: self.params.extend([Wlastl, Wlastr]) if outputs!=None: self.params.extend([Woutput, boutput]) if sharedlayers!=None: for i in sharedlayers.params: if i in self.params: self.params.remove(i)
def ZERO_GEN_SYMBOL(self, *s): return T.alloc(dtypeX(0),*s)
def trainroutine(ftrain,model,savename,vispath,fdatagen,fvis=None,fcheck=None,fcheckgen=None,TOLTIMES=5,BATCHSTEP=10,LEARNRATEVAR=None,LEARNRATETARGET=10.0,LEARNADJUST=1.01, remotemonitor = False, sameranks = [], longrangecheck = None, longrangeperiod = None,totalsteps = None): global TRAINSETTINGS TRAINSETTINGS.TOLTIMES = TOLTIMES TRAINSETTINGS.BATCHSTEP = BATCHSTEP TRAINSETTINGS.LEARNRATEVAR = LEARNRATEVAR TRAINSETTINGS.LEARNRATETARGET = LEARNRATETARGET TRAINSETTINGS.LEARNADJUST = LEARNADJUST TRAINSETTINGS.TOTALSTEPS = totalsteps from layerbase import safefile import sys, os from fractallayer import dtypeX if remotemonitor!=False: import modelrecord if remotemonitor==None: modrec = remotemonitor.Record() elif isinstance(remotemonitor, tuple): modrec = modelrecord.Record(*remotemonitor) else: modrec = modelrecord.Record(remotemonitor) modrec.genmeta(model, sameranks) else: modrec = None with safefile(savename) as loadf: if loadf: model.load(loadf.rb()) LOSS0 = 1e100 tol = 0 l = d = 0 if vispath!=None: if not os.path.exists(vispath): os.mkdir(vispath) MPDrawInitializer(vispath) if isinstance(fdatagen, (str,tuple,list)): fdatagen = MPTwoAheadProducer(fdatagen) else: fdatagen = TwoAheadProducer(fdatagen) step = 0 lrstep = 0 if longrangecheck!=None and longrangeperiod==None: longrangeperiod = BATCHSTEP if longrangeperiod!=None: TRAINSETTINGS.LONGRANGEPERIOD = longrangeperiod while True: step += 1 if TRAINSETTINGS.TOTALSTEPS!=None and step>TRAINSETTINGS.TOTALSTEPS: break while True: try: gen = fdatagen() loss,upd = [float(t) for t in ftrain(*gen)] break except KeyboardInterrupt:raise except SystemExit:raise except: import traceback traceback.print_exc() sys.stdout.write('*') continue l += loss d += upd sys.stdout.write('.') sys.stdout.flush() if step % TRAINSETTINGS.BATCHSTEP == TRAINSETTINGS.BATCHSTEP-1: print d,l, if TRAINSETTINGS.LEARNRATEVAR!=None: lval = TRAINSETTINGS.LEARNRATEVAR.get_value() if d>TRAINSETTINGS.LEARNRATETARGET*TRAINSETTINGS.BATCHSTEP: lval /= TRAINSETTINGS.LEARNADJUST else: lval *= TRAINSETTINGS.LEARNADJUST TRAINSETTINGS.LEARNRATEVAR.set_value(dtypeX(lval)) print lval, if modrec!=None: modrec.R() modrec.Rlt(l) modrec.Rd() l = d = 0 if vispath!=None: print "DRAW" #Draw model drawlayers = [] layer = 0 for i in model.paramlayers(): if len(i.params)<1: continue if len(i.params)>2: if hasattr(i,'reshape'): reshape = i.reshape else: reshape = [None]*len(i.params) for j,rj in zip(i.params,reshape): s = j.get_value() vsh = [i for i in s.shape if i>1] if len(vsh)<2: continue layer += 1 drawlayers.append((layer, s, rj)) else: layer += 1 drawlayers.append((layer, i.params[0].get_value(), i.reshape if hasattr(i,'reshape') and i.reshape!=None else None)) resplayers = fvis(*gen) if fvis!=None else [] MPDrawWriter(drawlayers,resplayers) else: print #Check validset if fcheckgen!=None and fcheck!=None: LOSS1 = 0.0 for j in fcheckgen(): sys.stdout.write('.') sys.stdout.flush() LOSS1 += fcheck(*j) print LOSS1 if modrec!=None: modrec.Rlv(LOSS1) if LOSS1>LOSS0: print "Converge on validset" tol+=1 if tol>TRAINSETTINGS.TOLTIMES: sys.exit(0) else: tol=0 print "NEW LOSS",LOSS1 LOSS0 = LOSS1 if longrangecheck!=None: lrstep += 1 if lrstep%TRAINSETTINGS.LONGRANGEPERIOD == TRAINSETTINGS.LONGRANGEPERIOD-1: try: result = longrangecheck() modrec.Rfloat(result) except KeyboardInterrupt: raise except SystemExit: raise except: import traceback traceback.print_exc() #Commit if modrec!=None: modrec.C() #Save model with safefile(savename) as savef: model.save(savef.wb())
def trainroutine(ftrain, model, savename, vispath, fdatagen, fvis=None, fcheck=None, fcheckgen=None, TOLTIMES=5, BATCHSTEP=10, LEARNRATEVAR=None, LEARNRATETARGET=10.0, LEARNADJUST=1.01, remotemonitor=False, sameranks=[], longrangecheck=None, longrangeperiod=None, totalsteps=None): global TRAINSETTINGS TRAINSETTINGS.TOLTIMES = TOLTIMES TRAINSETTINGS.BATCHSTEP = BATCHSTEP TRAINSETTINGS.LEARNRATEVAR = LEARNRATEVAR TRAINSETTINGS.LEARNRATETARGET = LEARNRATETARGET TRAINSETTINGS.LEARNADJUST = LEARNADJUST TRAINSETTINGS.TOTALSTEPS = totalsteps from layerbase import safefile import sys, os from fractallayer import dtypeX if remotemonitor != False: import modelrecord if remotemonitor == None: modrec = remotemonitor.Record() elif isinstance(remotemonitor, tuple): modrec = modelrecord.Record(*remotemonitor) else: modrec = modelrecord.Record(remotemonitor) modrec.genmeta(model, sameranks) else: modrec = None with safefile(savename) as loadf: if loadf: model.load(loadf.rb()) LOSS0 = 1e100 tol = 0 l = d = 0 if vispath != None: if not os.path.exists(vispath): os.mkdir(vispath) MPDrawInitializer(vispath) if isinstance(fdatagen, (str, tuple, list)): fdatagen = MPTwoAheadProducer(fdatagen) else: fdatagen = TwoAheadProducer(fdatagen) step = 0 lrstep = 0 if longrangecheck != None and longrangeperiod == None: longrangeperiod = BATCHSTEP if longrangeperiod != None: TRAINSETTINGS.LONGRANGEPERIOD = longrangeperiod while True: step += 1 if TRAINSETTINGS.TOTALSTEPS != None and step > TRAINSETTINGS.TOTALSTEPS: break while True: try: gen = fdatagen() loss, upd = [float(t) for t in ftrain(*gen)] break except KeyboardInterrupt: raise except SystemExit: raise except: import traceback traceback.print_exc() sys.stdout.write('*') continue l += loss d += upd sys.stdout.write('.') sys.stdout.flush() if step % TRAINSETTINGS.BATCHSTEP == TRAINSETTINGS.BATCHSTEP - 1: print d, l, if TRAINSETTINGS.LEARNRATEVAR != None: lval = TRAINSETTINGS.LEARNRATEVAR.get_value() if d > TRAINSETTINGS.LEARNRATETARGET * TRAINSETTINGS.BATCHSTEP: lval /= TRAINSETTINGS.LEARNADJUST else: lval *= TRAINSETTINGS.LEARNADJUST TRAINSETTINGS.LEARNRATEVAR.set_value(dtypeX(lval)) print lval, if modrec != None: modrec.R() modrec.Rlt(l) modrec.Rd() l = d = 0 if vispath != None: print "DRAW" #Draw model drawlayers = [] layer = 0 for i in model.paramlayers(): if len(i.params) < 1: continue if len(i.params) > 2: if hasattr(i, 'reshape'): reshape = i.reshape else: reshape = [None] * len(i.params) for j, rj in zip(i.params, reshape): s = j.get_value() vsh = [i for i in s.shape if i > 1] if len(vsh) < 2: continue layer += 1 drawlayers.append((layer, s, rj)) else: layer += 1 drawlayers.append((layer, i.params[0].get_value(), i.reshape if hasattr(i, 'reshape') and i.reshape != None else None)) resplayers = fvis(*gen) if fvis != None else [] MPDrawWriter(drawlayers, resplayers) else: print #Check validset if fcheckgen != None and fcheck != None: LOSS1 = 0.0 for j in fcheckgen(): sys.stdout.write('.') sys.stdout.flush() LOSS1 += fcheck(*j) print LOSS1 if modrec != None: modrec.Rlv(LOSS1) if LOSS1 > LOSS0: print "Converge on validset" tol += 1 if tol > TRAINSETTINGS.TOLTIMES: sys.exit(0) else: tol = 0 print "NEW LOSS", LOSS1 LOSS0 = LOSS1 if longrangecheck != None: lrstep += 1 if lrstep % TRAINSETTINGS.LONGRANGEPERIOD == TRAINSETTINGS.LONGRANGEPERIOD - 1: try: result = longrangecheck() modrec.Rfloat(result) except KeyboardInterrupt: raise except SystemExit: raise except: import traceback traceback.print_exc() #Commit if modrec != None: modrec.C() #Save model with safefile(savename) as savef: model.save(savef.wb())
def BlockGLSTMScanArrayToArray(rng, inlayer, szgate, szhidden, blocksize=10, warmup=10, outf=T.tanh, noot=False, backwards=False, shareLayer=None, warmupHidden=None, warmupOut=None): if backwards: inout = inlayer.output[::-1] else: inout = inlayer.output if warmupHidden != None: if backwards: whid = warmupHidden.output[::-1] else: whid = warmupHidden.output if warmupOut != None: if backwards: wout = warmupOut.output[::-1] else: wout = warmupOut.output #PrepareData totblks = (inlayer.output.shape[0] + blocksize - 1) / blocksize def oneStep(inp, laststate, lastout): inl = SymbolLayer(inp, (totblks, inlayer.output_shape[1])) lstmout = LCollect( GLSTM(rng, inl, laststate, lastout, szgate, szhidden, outf=outf, noot=noot, shareLayer=shareLayer)) return lstmout.hidden, lstmout.output stackinp = T.alloc(dtypeX(0), totblks, blocksize + warmup, inlayer.output_shape[1]) #Fill block data stackinp = T.set_subtensor( stackinp[:-1, warmup:], inout[:(totblks - 1) * blocksize].reshape( (totblks - 1, blocksize, inlayer.output.shape[1]))) stackinp = T.set_subtensor( stackinp[-1, warmup:warmup + inlayer.output.shape[0] - (totblks - 1) * blocksize], inout[(totblks - 1) * blocksize:].reshape( (inlayer.output.shape[0] - (totblks - 1) * blocksize, inlayer.output.shape[1]))) #Fill block warmup data stackinp = T.set_subtensor(stackinp[1:, :warmup], stackinp[:-1, -warmup:]) stackinp = stackinp.dimshuffle(1, 0, 2) LPush() #A large number firsthidden = T.alloc( dtypeX(0), totblks, szhidden ) #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupHidden: firsthidden = T.set_subtensor( firsthidden[warmup / blocksize + 1:], whid[-warmup + blocksize * (warmup / blocksize + 1):-warmup + blocksize * totblks:blocksize]) firstout = T.alloc( dtypeX(0), totblks, szhidden ) #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupOut: firstout = T.set_subtensor( firstout[warmup / blocksize + 1:], wout[-warmup + blocksize * (warmup / blocksize + 1):-warmup + blocksize * totblks:blocksize]) (hiddens, outs), updates = theano.scan(fn=oneStep, outputs_info=[firsthidden, firstout], sequences=stackinp) lstml = LPop()[0] #ExpandData hiddens = hiddens.dimshuffle(1, 0, 2) hiddens = hiddens[:, warmup:].reshape( (totblks * blocksize, szhidden))[:inlayer.output.shape[0]] outs = outs.dimshuffle(1, 0, 2) outs = outs[:, warmup:].reshape( (totblks * blocksize, szhidden))[:inlayer.output.shape[0]] if backwards: hiddens = hiddens[::-1] outs = outs[::-1] global extraHid extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden)) return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), lstml
def BlockLSTMUnrollArrayToArray(rng, inlayer, szhidden, blocksize=10, warmup=10, outf=T.tanh, noot=False, backwards=False, shareLayer=None, warmupHidden=None, warmupOut=None): if backwards: inout = inlayer.output[::-1] else: inout = inlayer.output if warmupHidden != None: if backwards: whid = warmupHidden.output[::-1] else: whid = warmupHidden.output if warmupOut != None: if backwards: wout = warmupOut.output[::-1] else: wout = warmupOut.output #PrepareData totblks = (inlayer.output.shape[0] + blocksize - 1) / blocksize def oneStep(inp, laststate, lastout): inl = SymbolLayer(inp, (totblks, inlayer.output_shape[1])) lstmout = LSTM(rng, inl, laststate, lastout, szhidden, outf=outf, noot=noot, shareLayer=shareLayer) return lstmout.hidden, lstmout.output, lstmout stackinp = T.alloc(dtypeX(0), totblks, blocksize + warmup, inlayer.output_shape[1]) #Fill block data stackinp = T.set_subtensor( stackinp[:-1, warmup:], inout[:(totblks - 1) * blocksize].reshape( (totblks - 1, blocksize, inlayer.output.shape[1]))) stackinp = T.set_subtensor( stackinp[-1, warmup:warmup + inlayer.output.shape[0] - (totblks - 1) * blocksize], inout[(totblks - 1) * blocksize:].reshape( (inlayer.output.shape[0] - (totblks - 1) * blocksize, inlayer.output.shape[1]))) #Fill block warmup data stackinp = T.set_subtensor(stackinp[1:, :warmup], stackinp[:-1, -warmup:]) stackinp = stackinp.dimshuffle(1, 0, 2) #A large number firsthidden = T.alloc( dtypeX(0), totblks, szhidden ) #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupHidden: firsthidden = T.set_subtensor( firsthidden[warmup / blocksize + 1:], whid[-warmup + blocksize * (warmup / blocksize + 1):-warmup + blocksize * totblks:blocksize]) firstout = T.alloc( dtypeX(0), totblks, szhidden ) #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks] if warmupOut: firstout = T.set_subtensor( firstout[warmup / blocksize + 1:], wout[-warmup + blocksize * (warmup / blocksize + 1):-warmup + blocksize * totblks:blocksize]) hiddens = [] outs = [] firstshare = None for i in range(warmup): firsthidden, firstout, shareLayer = oneStep(stackinp[i], firsthidden, firstout) if firstshare == None: firstshare = shareLayer for i in range(blocksize): firsthidden, firstout, shareLayer = oneStep(stackinp[i + warmup], firsthidden, firstout) if firstshare == None: firstshare = shareLayer hiddens.append(firsthidden) outs.append(firstout) hiddens = T.stack(*hiddens) outs = T.stack(*outs) #ExpandData (warmup is automatically eatten) hiddens = hiddens.dimshuffle(1, 0, 2) hiddens = hiddens.reshape( (totblks * blocksize, szhidden))[:inlayer.output.shape[0]] outs = outs.dimshuffle(1, 0, 2) outs = outs.reshape( (totblks * blocksize, szhidden))[:inlayer.output.shape[0]] if backwards: hiddens = hiddens[::-1] outs = outs[::-1] global extraHid extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden)) return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), firstshare