Пример #1
0
def BlockGLSTMScanArrayToArray(rng, inlayer, szgate, szhidden, blocksize = 10, warmup = 10, outf = T.tanh, noot = False, backwards = False, shareLayer = None, warmupHidden = None, warmupOut = None):
    if backwards:
        inout = inlayer.output[::-1]
    else:
        inout = inlayer.output
    
    if warmupHidden!=None:
        if backwards:
            whid = warmupHidden.output[::-1]
        else:
            whid = warmupHidden.output

    if warmupOut!=None:
        if backwards:
            wout = warmupOut.output[::-1]
        else:
            wout = warmupOut.output

    #PrepareData
    totblks = (inlayer.output.shape[0]+blocksize-1) / blocksize
    def oneStep(inp, laststate, lastout):
        inl = SymbolLayer(inp, (totblks,inlayer.output_shape[1]))
        lstmout = LCollect(GLSTM(rng, inl, laststate, lastout, szgate, szhidden, outf = outf, noot = noot, shareLayer = shareLayer))
        return lstmout.hidden, lstmout.output

    stackinp = T.alloc(dtypeX(0), totblks, blocksize+warmup, inlayer.output_shape[1])

    #Fill block data
    stackinp = T.set_subtensor(stackinp[:-1,warmup:],inout[:(totblks-1)*blocksize].reshape((totblks-1, blocksize, inlayer.output.shape[1])))
    stackinp = T.set_subtensor(stackinp[-1,warmup:warmup+inlayer.output.shape[0]-(totblks-1)*blocksize],inout[(totblks-1)*blocksize:].reshape((inlayer.output.shape[0]-(totblks-1)*blocksize, inlayer.output.shape[1])))
    #Fill block warmup data
    stackinp = T.set_subtensor(stackinp[1:,:warmup],stackinp[:-1,-warmup:])
    stackinp = stackinp.dimshuffle(1,0,2)
    LPush()
    #A large number
    firsthidden = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupHidden:
        firsthidden = T.set_subtensor(firsthidden[warmup/blocksize+1:], whid[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize])
    firstout = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupOut:
        firstout = T.set_subtensor(firstout[warmup/blocksize+1:], wout[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize])
    (hiddens, outs), updates = theano.scan(fn=oneStep, outputs_info = [firsthidden, firstout], sequences=stackinp)
    lstml = LPop()[0]
    #ExpandData
    hiddens = hiddens.dimshuffle(1,0,2)
    hiddens = hiddens[:,warmup:].reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]]
    outs = outs.dimshuffle(1,0,2)
    outs = outs[:,warmup:].reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]]
    if backwards:
        hiddens = hiddens[::-1]
        outs = outs[::-1]
    global extraHid
    extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden))
    return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), lstml
Пример #2
0
def LSTMScanArrayToArray(rng, inlayer, szhidden, outf = T.tanh, backwards = False):
    def oneStep(inp, laststate, lastout):
        inl = SymbolLayer(inp, (1,inlayer.output_shape[1]))
        lstmout = LCollect(LSTM(rng, inl, laststate, lastout, szhidden, outf = outf))
        return lstmout.hidden, lstmout.output
    LPush()
    firsthidden = T.alloc(dtypeX(0), 1,szhidden)
    firstout = T.alloc(dtypeX(0), 1,szhidden)
    (hiddens, outs), updates = theano.scan(fn=oneStep, outputs_info = [firsthidden, firstout], sequences=inlayer.output, go_backwards=backwards)
    lstml = LPop()[0]
    return SymbolLayer(outs.reshape((inlayer.output_shape[0], szhidden)), (inlayer.output_shape[0], szhidden)), lstml
Пример #3
0
 def __init__(self, input, orig):
     assert isinstance(orig, ShrinkShapeFractal1D)
     self.output_shape = orig.origlayer.output_shape
     self.output = T.alloc(dtypeX(0), orig.origlayer.output.shape[0],
                           orig.origlayer.output.shape[1])
     self.output = T.set_subtensor(self.output[::2], input.output)
     self.output = T.set_subtensor(
         self.output[1::2],
         input.output[:orig.origlayer.output.shape[0] / 2])
Пример #4
0
def LSTMScanArrayToArray(rng, inlayer, szhidden, outf=T.tanh, backwards=False):
    def oneStep(inp, laststate, lastout):
        inl = SymbolLayer(inp, (1, inlayer.output_shape[1]))
        lstmout = LCollect(
            LSTM(rng, inl, laststate, lastout, szhidden, outf=outf))
        return lstmout.hidden, lstmout.output

    LPush()
    firsthidden = T.alloc(dtypeX(0), 1, szhidden)
    firstout = T.alloc(dtypeX(0), 1, szhidden)
    (hiddens,
     outs), updates = theano.scan(fn=oneStep,
                                  outputs_info=[firsthidden, firstout],
                                  sequences=inlayer.output,
                                  go_backwards=backwards)
    lstml = LPop()[0]
    return SymbolLayer(outs.reshape((inlayer.output_shape[0], szhidden)),
                       (inlayer.output_shape[0], szhidden)), lstml
Пример #5
0
 def __init__(self, input, total, current):
     assert len(input.output_shape)==3
     #SymbolPoolsize
     self.poolsize = poolsize = T.cast(T.floor(input.output_shape[2] ** (1.0/(total-current+1))),'int32')
     self.poolresp = poolresp = T.cast((input.output_shape[2] + poolsize - 1) / poolsize,'int32')
     #Pad to normal size
     rectinput = T.alloc(dtypeX(0), input.output_shape[0], input.output_shape[1], poolsize*poolresp)
     rectinput = T.set_subtensor(rectinput[:,:,:input.output_shape[2]],input.output)
     rectinput = rectinput.reshape((input.output_shape[0], input.output_shape[1], poolresp, poolsize))
     #Reshape and mean axis out
     self.output = T.mean(rectinput,3)
     #Special deal with last index
     self.output = T.set_subtensor(self.output[:,:,-1], T.mean(rectinput[:,:,-1,:input.output_shape[2]-(poolresp-1)*poolsize],2))
     self.output_shape = (input.output_shape[0], input.output_shape[1], poolresp)
Пример #6
0
 def __init__(self, input, orig):
     assert isinstance(orig, ShrinkShapeFractal1D)
     self.output_shape = orig.origlayer.output_shape
     self.output = T.alloc(dtypeX(0), orig.origlayer.output.shape[0], orig.origlayer.output.shape[1])
     self.output = T.set_subtensor(self.output[::2], input.output)
     self.output = T.set_subtensor(self.output[1::2], input.output[:orig.origlayer.output.shape[0]/2])
Пример #7
0
def BlockLSTMUnrollArrayToArray(rng, inlayer, szhidden, blocksize = 10, warmup = 10, outf = T.tanh, noot = False, backwards = False, shareLayer = None, warmupHidden = None, warmupOut = None):
    if backwards:
        inout = inlayer.output[::-1]
    else:
        inout = inlayer.output
    
    if warmupHidden!=None:
        if backwards:
            whid = warmupHidden.output[::-1]
        else:
            whid = warmupHidden.output

    if warmupOut!=None:
        if backwards:
            wout = warmupOut.output[::-1]
        else:
            wout = warmupOut.output

    #PrepareData
    totblks = (inlayer.output.shape[0]+blocksize-1) / blocksize
    def oneStep(inp, laststate, lastout):
        inl = SymbolLayer(inp, (totblks,inlayer.output_shape[1]))
        lstmout = LSTM(rng, inl, laststate, lastout, szhidden, outf = outf, noot = noot, shareLayer = shareLayer)
        return lstmout.hidden, lstmout.output, lstmout

    stackinp = T.alloc(dtypeX(0), totblks, blocksize+warmup, inlayer.output_shape[1])

    #Fill block data
    stackinp = T.set_subtensor(stackinp[:-1,warmup:],inout[:(totblks-1)*blocksize].reshape((totblks-1, blocksize, inlayer.output.shape[1])))
    stackinp = T.set_subtensor(stackinp[-1,warmup:warmup+inlayer.output.shape[0]-(totblks-1)*blocksize],inout[(totblks-1)*blocksize:].reshape((inlayer.output.shape[0]-(totblks-1)*blocksize, inlayer.output.shape[1])))
    #Fill block warmup data
    stackinp = T.set_subtensor(stackinp[1:,:warmup],stackinp[:-1,-warmup:])
    stackinp = stackinp.dimshuffle(1,0,2)
    #A large number
    firsthidden = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupHidden:
        firsthidden = T.set_subtensor(firsthidden[warmup/blocksize+1:], whid[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize])
    firstout = T.alloc(dtypeX(0), totblks, szhidden)#T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupOut:
        firstout = T.set_subtensor(firstout[warmup/blocksize+1:], wout[-warmup+blocksize*(warmup/blocksize+1):-warmup+blocksize*totblks:blocksize])

    hiddens = []
    outs = []
    firstshare = None
    for i in range(warmup):
        firsthidden, firstout, shareLayer = oneStep(stackinp[i], firsthidden, firstout)
        if firstshare==None: firstshare = shareLayer
    for i in range(blocksize):
        firsthidden, firstout, shareLayer = oneStep(stackinp[i+warmup], firsthidden, firstout)
        if firstshare==None: firstshare = shareLayer
        hiddens.append(firsthidden)
        outs.append(firstout)

    hiddens = T.stack(*hiddens)
    outs = T.stack(*outs)
    #ExpandData (warmup is automatically eatten)
    hiddens = hiddens.dimshuffle(1,0,2)
    hiddens = hiddens.reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]]
    outs = outs.dimshuffle(1,0,2)
    outs = outs.reshape((totblks*blocksize,szhidden))[:inlayer.output.shape[0]]
    if backwards:
        hiddens = hiddens[::-1]
        outs = outs[::-1]
    global extraHid
    extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden))
    return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), firstshare
Пример #8
0
    def __init__(self, rng, originput, lastinput, hiddenl, hiddenr, stepl, stepr, outputs = None, sharedlayers = None, nlscan = 'tanh', nlout = 'tanh'):
        if hiddenr == None: hiddenr = hiddenl
        if stepr == None: stepr = stepl
        assert len(originput.output_shape)==3
        assert lastinput == None or originput.output_shape[0] == lastinput.output_shape[0]
        ilayers = originput.output_shape[1]
        if sharedlayers!=None: wd = sharedlayers.__dict__
        else: wd = {}
        self.Win_hl = Win_hl = wd.get('Win_hl') or self.RNG_GEN(rng, ilayers, hiddenl)
        self.Win_hr = Win_hr = wd.get('Win_hr') or self.RNG_GEN(rng, ilayers, hiddenr)
        self.Wprev = Wprev = wd.get('Wprev') or self.RNG_GEN(rng, hiddenl, hiddenl)
        self.Wnext = Wnext = wd.get('Wnext') or self.RNG_GEN(rng, hiddenr, hiddenr)
        self.bl = bl = wd.get('bl') or self.ZERO_GEN(hiddenl)
        self.br = br = wd.get('br') or self.ZERO_GEN(hiddenr)
        if lastinput != None:
            ilast = lastinput.output_shape[1]
            self.Wlastl = Wlastl = wd.get('Wlastl') or self.RNG_GEN(rng, ilast, hiddenl)
            self.Wlastr = Wlastr = wd.get('Wlastr') or self.RNG_GEN(rng, ilast, hiddenr)
        if outputs != None:
            self.Woutput = Woutput = wd.get('Woutput') or self.RNG_GEN(rng, hiddenl+hiddenr, outputs)
            self.boutput = boutput = wd.get('boutput') or self.ZERO_GEN(outputs)
        else:
            #A aggregation output solution
            pass
        
        vl0 = self.ZERO_GEN_SYMBOL(originput.output_shape[0], hiddenl)
        vr0 = self.ZERO_GEN_SYMBOL(originput.output_shape[0], hiddenr)

        
        #Real work
        self.odim = odim = (originput.output_shape[1],originput.output_shape[0])
        ldim = None
        self.odl = odl = (hiddenl,originput.output_shape[0])
        self.odr = odr = (hiddenr,originput.output_shape[0])
        self.T_orig_o = T_orig_o = originput.output.dimshuffle(2,0,1)
        if lastinput != None:
            self.ldim = ldim = (lastinput.output_shape[1],lastinput.output_shape[0])
            self.T_last_o = T_last_o = lastinput.output.dimshuffle(2,0,1)
            statL = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenl)
            for steps in range(stepl):
                if steps!=0:
                    statL = T.set_subtensor(statL[1:], statL[:-1])
                    statL = T.set_subtensor(statL[0], dtypeX(0))
                t = T.tensordot(statL, Wprev, [[2], [1]]) + T.tensordot(T_orig_o, Win_hl, [[2], [1]]) + T.tensordot(T_last_o, Wlastl, [[2], [1]])
                statL = nonlinear(t + bl.dimshuffle('x','x',0), nlscan)
            self.currl = currl = statL
            statR = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenr)
            for steps in range(stepr):
                if steps!=0:
                    statR = T.set_subtensor(statR[:-1], statR[1:])
                    statR = T.set_subtensor(statR[-1], dtypeX(0))
                t = T.tensordot(statR, Wnext, [[2], [1]]) + T.tensordot(T_orig_o, Win_hr, [[2], [1]]) + T.tensordot(T_last_o, Wlastr, [[2], [1]])
                statR = nonlinear(t + br.dimshuffle('x','x',0), nlscan)
            self.currr = currr = statR
        else:
            statL = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenl)
            for steps in range(stepl):
                if steps!=0:
                    statL = T.set_subtensor(statL[1:], statL[:-1])
                    statL = T.set_subtensor(statL[0], dtypeX(0))
                t = T.tensordot(statL, Wprev, [[2], [1]]) + T.tensordot(T_orig_o, Win_hl, [[2], [1]])
                statL = nonlinear(t + bl.dimshuffle('x','x',0), nlscan)
            self.currl = currl = statL
            statR = T.alloc(dtypeX(0), originput.output_shape[2], originput.output_shape[0], hiddenr)
            for steps in range(stepr):
                if steps!=0:
                    statR = T.set_subtensor(statR[:-1], statR[1:])
                    statR = T.set_subtensor(statR[-1], dtypeX(0))
                t = T.tensordot(statR, Wnext, [[2], [1]]) + T.tensordot(T_orig_o, Win_hr, [[2], [1]])
                statR = nonlinear(t + br.dimshuffle('x','x',0), nlscan)
            self.currr = currr = statR

        #Make output
        aggout = T.concatenate([currl, currr], axis=2)
        if outputs != None:
            #Transpose through another layer
            self.output = nonlinear(T.tensordot(aggout, Woutput, [[2],[1]]) + boutput.dimshuffle('x','x',0), nlout).dimshuffle(1,2,0)
            self.output_shape = [originput.output_shape[0], outputs, originput.output_shape[2]]
        else:
            self.output = aggout.dimshuffle(1,2,0)
            self.output_shape = [originput.output_shape[0], hiddenl+hiddenr, originput.output_shape[2]]
        
        self.params = [Win_hl, Win_hr, Wprev, Wnext, bl, br]
        if lastinput!=None:
            self.params.extend([Wlastl, Wlastr])
        if outputs!=None:
            self.params.extend([Woutput, boutput])

        if sharedlayers!=None:
            for i in sharedlayers.params:
                if i in self.params:
                    self.params.remove(i)
Пример #9
0
 def ZERO_GEN_SYMBOL(self, *s):
     return T.alloc(dtypeX(0),*s)
Пример #10
0
def trainroutine(ftrain,model,savename,vispath,fdatagen,fvis=None,fcheck=None,fcheckgen=None,TOLTIMES=5,BATCHSTEP=10,LEARNRATEVAR=None,LEARNRATETARGET=10.0,LEARNADJUST=1.01, remotemonitor = False, sameranks = [], longrangecheck = None, longrangeperiod = None,totalsteps = None):
    global TRAINSETTINGS
    TRAINSETTINGS.TOLTIMES = TOLTIMES
    TRAINSETTINGS.BATCHSTEP = BATCHSTEP
    TRAINSETTINGS.LEARNRATEVAR = LEARNRATEVAR
    TRAINSETTINGS.LEARNRATETARGET = LEARNRATETARGET
    TRAINSETTINGS.LEARNADJUST = LEARNADJUST
    TRAINSETTINGS.TOTALSTEPS = totalsteps
    from layerbase import safefile
    import sys, os
    from fractallayer import dtypeX
    if remotemonitor!=False:
        import modelrecord
        if remotemonitor==None: modrec = remotemonitor.Record()
        elif isinstance(remotemonitor, tuple): modrec = modelrecord.Record(*remotemonitor)
        else: modrec = modelrecord.Record(remotemonitor)
        modrec.genmeta(model, sameranks)
    else:
        modrec = None

    with safefile(savename) as loadf:
        if loadf:
            model.load(loadf.rb())
    LOSS0 = 1e100
    tol = 0
    l = d = 0
    if vispath!=None:
        if not os.path.exists(vispath):
            os.mkdir(vispath)
        MPDrawInitializer(vispath)
    if isinstance(fdatagen, (str,tuple,list)):
        fdatagen = MPTwoAheadProducer(fdatagen)
    else:
        fdatagen = TwoAheadProducer(fdatagen)
    step = 0
    lrstep = 0
    if longrangecheck!=None and longrangeperiod==None:
        longrangeperiod = BATCHSTEP
    if longrangeperiod!=None:
        TRAINSETTINGS.LONGRANGEPERIOD = longrangeperiod
    while True:
        step += 1
        if TRAINSETTINGS.TOTALSTEPS!=None and step>TRAINSETTINGS.TOTALSTEPS: break
        while True:
            try:
                gen = fdatagen()
                loss,upd = [float(t) for t in ftrain(*gen)]
                break
            except KeyboardInterrupt:raise
            except SystemExit:raise
            except:
                import traceback
                traceback.print_exc()
                sys.stdout.write('*')
                continue
        l += loss
        d += upd
        sys.stdout.write('.')
        sys.stdout.flush()
        if step % TRAINSETTINGS.BATCHSTEP == TRAINSETTINGS.BATCHSTEP-1:
            print d,l,
            if TRAINSETTINGS.LEARNRATEVAR!=None:
                lval = TRAINSETTINGS.LEARNRATEVAR.get_value()
                if d>TRAINSETTINGS.LEARNRATETARGET*TRAINSETTINGS.BATCHSTEP: lval /= TRAINSETTINGS.LEARNADJUST
                else: lval *= TRAINSETTINGS.LEARNADJUST
                TRAINSETTINGS.LEARNRATEVAR.set_value(dtypeX(lval))
                print lval,
            if modrec!=None:
                modrec.R()
                modrec.Rlt(l)
                modrec.Rd()
            l = d = 0
            if vispath!=None:
                print "DRAW"
                #Draw model
                drawlayers = []
                layer = 0
                for i in model.paramlayers():
                    if len(i.params)<1: continue
                    if len(i.params)>2:
                        if hasattr(i,'reshape'):
                            reshape = i.reshape
                        else:
                            reshape = [None]*len(i.params)
                        for j,rj in zip(i.params,reshape):
                            s = j.get_value()
                            vsh = [i for i in s.shape if i>1]
                            if len(vsh)<2: continue
                            layer += 1
                            drawlayers.append((layer, s, rj))
                    else:
                        layer += 1
                        drawlayers.append((layer, i.params[0].get_value(), i.reshape if hasattr(i,'reshape') and i.reshape!=None else None))
                resplayers = fvis(*gen) if fvis!=None else []
                MPDrawWriter(drawlayers,resplayers)
            else:
                print
            #Check validset
            if fcheckgen!=None and fcheck!=None:
                LOSS1 = 0.0
                for j in fcheckgen():
                    sys.stdout.write('.')
                    sys.stdout.flush()
                    LOSS1 += fcheck(*j)
                print LOSS1
                if modrec!=None:
                    modrec.Rlv(LOSS1)
                if LOSS1>LOSS0:
                    print "Converge on validset"
                    tol+=1
                    if tol>TRAINSETTINGS.TOLTIMES:
                        sys.exit(0)
                else:
                    tol=0
                print "NEW LOSS",LOSS1
                LOSS0 = LOSS1
            if longrangecheck!=None:
                lrstep += 1
                if lrstep%TRAINSETTINGS.LONGRANGEPERIOD == TRAINSETTINGS.LONGRANGEPERIOD-1:
                    try:
                        result = longrangecheck()
                        modrec.Rfloat(result)
                    except KeyboardInterrupt: raise
                    except SystemExit: raise
                    except:
                        import traceback
                        traceback.print_exc()
            #Commit
            if modrec!=None:
                modrec.C()
            #Save model
            with safefile(savename) as savef:
                model.save(savef.wb())
Пример #11
0
def trainroutine(ftrain,
                 model,
                 savename,
                 vispath,
                 fdatagen,
                 fvis=None,
                 fcheck=None,
                 fcheckgen=None,
                 TOLTIMES=5,
                 BATCHSTEP=10,
                 LEARNRATEVAR=None,
                 LEARNRATETARGET=10.0,
                 LEARNADJUST=1.01,
                 remotemonitor=False,
                 sameranks=[],
                 longrangecheck=None,
                 longrangeperiod=None,
                 totalsteps=None):
    global TRAINSETTINGS
    TRAINSETTINGS.TOLTIMES = TOLTIMES
    TRAINSETTINGS.BATCHSTEP = BATCHSTEP
    TRAINSETTINGS.LEARNRATEVAR = LEARNRATEVAR
    TRAINSETTINGS.LEARNRATETARGET = LEARNRATETARGET
    TRAINSETTINGS.LEARNADJUST = LEARNADJUST
    TRAINSETTINGS.TOTALSTEPS = totalsteps
    from layerbase import safefile
    import sys, os
    from fractallayer import dtypeX
    if remotemonitor != False:
        import modelrecord
        if remotemonitor == None: modrec = remotemonitor.Record()
        elif isinstance(remotemonitor, tuple):
            modrec = modelrecord.Record(*remotemonitor)
        else:
            modrec = modelrecord.Record(remotemonitor)
        modrec.genmeta(model, sameranks)
    else:
        modrec = None

    with safefile(savename) as loadf:
        if loadf:
            model.load(loadf.rb())
    LOSS0 = 1e100
    tol = 0
    l = d = 0
    if vispath != None:
        if not os.path.exists(vispath):
            os.mkdir(vispath)
        MPDrawInitializer(vispath)
    if isinstance(fdatagen, (str, tuple, list)):
        fdatagen = MPTwoAheadProducer(fdatagen)
    else:
        fdatagen = TwoAheadProducer(fdatagen)
    step = 0
    lrstep = 0
    if longrangecheck != None and longrangeperiod == None:
        longrangeperiod = BATCHSTEP
    if longrangeperiod != None:
        TRAINSETTINGS.LONGRANGEPERIOD = longrangeperiod
    while True:
        step += 1
        if TRAINSETTINGS.TOTALSTEPS != None and step > TRAINSETTINGS.TOTALSTEPS:
            break
        while True:
            try:
                gen = fdatagen()
                loss, upd = [float(t) for t in ftrain(*gen)]
                break
            except KeyboardInterrupt:
                raise
            except SystemExit:
                raise
            except:
                import traceback
                traceback.print_exc()
                sys.stdout.write('*')
                continue
        l += loss
        d += upd
        sys.stdout.write('.')
        sys.stdout.flush()
        if step % TRAINSETTINGS.BATCHSTEP == TRAINSETTINGS.BATCHSTEP - 1:
            print d, l,
            if TRAINSETTINGS.LEARNRATEVAR != None:
                lval = TRAINSETTINGS.LEARNRATEVAR.get_value()
                if d > TRAINSETTINGS.LEARNRATETARGET * TRAINSETTINGS.BATCHSTEP:
                    lval /= TRAINSETTINGS.LEARNADJUST
                else:
                    lval *= TRAINSETTINGS.LEARNADJUST
                TRAINSETTINGS.LEARNRATEVAR.set_value(dtypeX(lval))
                print lval,
            if modrec != None:
                modrec.R()
                modrec.Rlt(l)
                modrec.Rd()
            l = d = 0
            if vispath != None:
                print "DRAW"
                #Draw model
                drawlayers = []
                layer = 0
                for i in model.paramlayers():
                    if len(i.params) < 1: continue
                    if len(i.params) > 2:
                        if hasattr(i, 'reshape'):
                            reshape = i.reshape
                        else:
                            reshape = [None] * len(i.params)
                        for j, rj in zip(i.params, reshape):
                            s = j.get_value()
                            vsh = [i for i in s.shape if i > 1]
                            if len(vsh) < 2: continue
                            layer += 1
                            drawlayers.append((layer, s, rj))
                    else:
                        layer += 1
                        drawlayers.append((layer, i.params[0].get_value(),
                                           i.reshape if hasattr(i, 'reshape')
                                           and i.reshape != None else None))
                resplayers = fvis(*gen) if fvis != None else []
                MPDrawWriter(drawlayers, resplayers)
            else:
                print
            #Check validset
            if fcheckgen != None and fcheck != None:
                LOSS1 = 0.0
                for j in fcheckgen():
                    sys.stdout.write('.')
                    sys.stdout.flush()
                    LOSS1 += fcheck(*j)
                print LOSS1
                if modrec != None:
                    modrec.Rlv(LOSS1)
                if LOSS1 > LOSS0:
                    print "Converge on validset"
                    tol += 1
                    if tol > TRAINSETTINGS.TOLTIMES:
                        sys.exit(0)
                else:
                    tol = 0
                print "NEW LOSS", LOSS1
                LOSS0 = LOSS1
            if longrangecheck != None:
                lrstep += 1
                if lrstep % TRAINSETTINGS.LONGRANGEPERIOD == TRAINSETTINGS.LONGRANGEPERIOD - 1:
                    try:
                        result = longrangecheck()
                        modrec.Rfloat(result)
                    except KeyboardInterrupt:
                        raise
                    except SystemExit:
                        raise
                    except:
                        import traceback
                        traceback.print_exc()
            #Commit
            if modrec != None:
                modrec.C()
            #Save model
            with safefile(savename) as savef:
                model.save(savef.wb())
Пример #12
0
def BlockGLSTMScanArrayToArray(rng,
                               inlayer,
                               szgate,
                               szhidden,
                               blocksize=10,
                               warmup=10,
                               outf=T.tanh,
                               noot=False,
                               backwards=False,
                               shareLayer=None,
                               warmupHidden=None,
                               warmupOut=None):
    if backwards:
        inout = inlayer.output[::-1]
    else:
        inout = inlayer.output

    if warmupHidden != None:
        if backwards:
            whid = warmupHidden.output[::-1]
        else:
            whid = warmupHidden.output

    if warmupOut != None:
        if backwards:
            wout = warmupOut.output[::-1]
        else:
            wout = warmupOut.output

    #PrepareData
    totblks = (inlayer.output.shape[0] + blocksize - 1) / blocksize

    def oneStep(inp, laststate, lastout):
        inl = SymbolLayer(inp, (totblks, inlayer.output_shape[1]))
        lstmout = LCollect(
            GLSTM(rng,
                  inl,
                  laststate,
                  lastout,
                  szgate,
                  szhidden,
                  outf=outf,
                  noot=noot,
                  shareLayer=shareLayer))
        return lstmout.hidden, lstmout.output

    stackinp = T.alloc(dtypeX(0), totblks, blocksize + warmup,
                       inlayer.output_shape[1])

    #Fill block data
    stackinp = T.set_subtensor(
        stackinp[:-1, warmup:], inout[:(totblks - 1) * blocksize].reshape(
            (totblks - 1, blocksize, inlayer.output.shape[1])))
    stackinp = T.set_subtensor(
        stackinp[-1, warmup:warmup + inlayer.output.shape[0] -
                 (totblks - 1) * blocksize],
        inout[(totblks - 1) * blocksize:].reshape(
            (inlayer.output.shape[0] - (totblks - 1) * blocksize,
             inlayer.output.shape[1])))
    #Fill block warmup data
    stackinp = T.set_subtensor(stackinp[1:, :warmup], stackinp[:-1, -warmup:])
    stackinp = stackinp.dimshuffle(1, 0, 2)
    LPush()
    #A large number
    firsthidden = T.alloc(
        dtypeX(0), totblks, szhidden
    )  #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupHidden:
        firsthidden = T.set_subtensor(
            firsthidden[warmup / blocksize + 1:],
            whid[-warmup + blocksize * (warmup / blocksize + 1):-warmup +
                 blocksize * totblks:blocksize])
    firstout = T.alloc(
        dtypeX(0), totblks, szhidden
    )  #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupOut:
        firstout = T.set_subtensor(
            firstout[warmup / blocksize + 1:],
            wout[-warmup + blocksize * (warmup / blocksize + 1):-warmup +
                 blocksize * totblks:blocksize])
    (hiddens,
     outs), updates = theano.scan(fn=oneStep,
                                  outputs_info=[firsthidden, firstout],
                                  sequences=stackinp)
    lstml = LPop()[0]
    #ExpandData
    hiddens = hiddens.dimshuffle(1, 0, 2)
    hiddens = hiddens[:, warmup:].reshape(
        (totblks * blocksize, szhidden))[:inlayer.output.shape[0]]
    outs = outs.dimshuffle(1, 0, 2)
    outs = outs[:, warmup:].reshape(
        (totblks * blocksize, szhidden))[:inlayer.output.shape[0]]
    if backwards:
        hiddens = hiddens[::-1]
        outs = outs[::-1]
    global extraHid
    extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden))
    return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), lstml
Пример #13
0
def BlockLSTMUnrollArrayToArray(rng,
                                inlayer,
                                szhidden,
                                blocksize=10,
                                warmup=10,
                                outf=T.tanh,
                                noot=False,
                                backwards=False,
                                shareLayer=None,
                                warmupHidden=None,
                                warmupOut=None):
    if backwards:
        inout = inlayer.output[::-1]
    else:
        inout = inlayer.output

    if warmupHidden != None:
        if backwards:
            whid = warmupHidden.output[::-1]
        else:
            whid = warmupHidden.output

    if warmupOut != None:
        if backwards:
            wout = warmupOut.output[::-1]
        else:
            wout = warmupOut.output

    #PrepareData
    totblks = (inlayer.output.shape[0] + blocksize - 1) / blocksize

    def oneStep(inp, laststate, lastout):
        inl = SymbolLayer(inp, (totblks, inlayer.output_shape[1]))
        lstmout = LSTM(rng,
                       inl,
                       laststate,
                       lastout,
                       szhidden,
                       outf=outf,
                       noot=noot,
                       shareLayer=shareLayer)
        return lstmout.hidden, lstmout.output, lstmout

    stackinp = T.alloc(dtypeX(0), totblks, blocksize + warmup,
                       inlayer.output_shape[1])

    #Fill block data
    stackinp = T.set_subtensor(
        stackinp[:-1, warmup:], inout[:(totblks - 1) * blocksize].reshape(
            (totblks - 1, blocksize, inlayer.output.shape[1])))
    stackinp = T.set_subtensor(
        stackinp[-1, warmup:warmup + inlayer.output.shape[0] -
                 (totblks - 1) * blocksize],
        inout[(totblks - 1) * blocksize:].reshape(
            (inlayer.output.shape[0] - (totblks - 1) * blocksize,
             inlayer.output.shape[1])))
    #Fill block warmup data
    stackinp = T.set_subtensor(stackinp[1:, :warmup], stackinp[:-1, -warmup:])
    stackinp = stackinp.dimshuffle(1, 0, 2)
    #A large number
    firsthidden = T.alloc(
        dtypeX(0), totblks, szhidden
    )  #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupHidden:
        firsthidden = T.set_subtensor(
            firsthidden[warmup / blocksize + 1:],
            whid[-warmup + blocksize * (warmup / blocksize + 1):-warmup +
                 blocksize * totblks:blocksize])
    firstout = T.alloc(
        dtypeX(0), totblks, szhidden
    )  #T.as_tensor_variable(np.zeros((1000,szhidden),'f'))[:totblks]
    if warmupOut:
        firstout = T.set_subtensor(
            firstout[warmup / blocksize + 1:],
            wout[-warmup + blocksize * (warmup / blocksize + 1):-warmup +
                 blocksize * totblks:blocksize])

    hiddens = []
    outs = []
    firstshare = None
    for i in range(warmup):
        firsthidden, firstout, shareLayer = oneStep(stackinp[i], firsthidden,
                                                    firstout)
        if firstshare == None: firstshare = shareLayer
    for i in range(blocksize):
        firsthidden, firstout, shareLayer = oneStep(stackinp[i + warmup],
                                                    firsthidden, firstout)
        if firstshare == None: firstshare = shareLayer
        hiddens.append(firsthidden)
        outs.append(firstout)

    hiddens = T.stack(*hiddens)
    outs = T.stack(*outs)
    #ExpandData (warmup is automatically eatten)
    hiddens = hiddens.dimshuffle(1, 0, 2)
    hiddens = hiddens.reshape(
        (totblks * blocksize, szhidden))[:inlayer.output.shape[0]]
    outs = outs.dimshuffle(1, 0, 2)
    outs = outs.reshape(
        (totblks * blocksize, szhidden))[:inlayer.output.shape[0]]
    if backwards:
        hiddens = hiddens[::-1]
        outs = outs[::-1]
    global extraHid
    extraHid = SymbolLayer(hiddens, (inlayer.output_shape[0], szhidden))
    return SymbolLayer(outs, (inlayer.output_shape[0], szhidden)), firstshare