def __init__(self, rng, rstream, x, wantOut, params, useRglrz, bnPhase, globalParams = None):# add cost ''' Constructing the mlp model. Arguments: rng, rstream - random streams x1, x2 - x batches from T1 and T2 set wantOut1, wantOut2 - corresponding labels params - all model parameters graph - theano variable determining how are BN params computed globalParams - T2 params when one-per-network ''' # defining shared variables shared across layers if globalParams is None: globalParams = {} for rglrz in params.rglrzPerNetwork: tempParam = np.asarray(params.rglrzInitial[rglrz][0], dtype=theano.config.floatX) globalParams[rglrz] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 0), borrow=True) for rglrz in params.rglrzPerNetwork1: tempParam = np.asarray(params.rglrzInitial[rglrz][0], dtype=theano.config.floatX) globalParams[rglrz+str(0)] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 0), borrow=True) tempParam = np.asarray(params.rglrzInitial[rglrz][1], dtype=theano.config.floatX) globalParams[rglrz] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 1), borrow=True) # initializations of counters, lists and dictionaries h = [] penalty = 0. trackT2Params = {} for param in params.rglrz: trackT2Params[param] = [] paramsT1, paramsT2, paramsBN, updateBN = [[],[],[],[]] # CONSTRUCT NETWORK for i in range(0, params.nLayers): h.append(mlp_layer(rng=rng, rstream=rstream, index=i, x=x, params=params, globalParams=globalParams, useRglrz=useRglrz, bnPhase=bnPhase)) # collect penalty terms if 'L2' in params.rglrz: tempW = h[i].rglrzParam['L2'] * T.sqr(h[i].W) penalty += T.sum(tempW) if 'L1' in params.rglrz: tempW = self.rglrzParam['L1'] * T.sum(abs(h[i].W), axis=0) penalty += T.sum(tempW) if 'LmaxCutoff' in params.rglrz: c = self.rglrzParam['LmaxCutoff'] # cutoff s = self.rglrzParam['LmaxSlope'] # slope tempW = T.sqrt(T.sum(T.sqr(h[i].W), axis=0)) penalty += T.sum(s*T.sqr(T.maximum(tempW - c, 0))) # collect T1 params paramsT1 += h[i].paramsT1 # collect T2 params paramsT2 += h[i].paramsT2 for param in params.rglrz: if (param == 'inputNoise' and i == 0) or (param != 'inputNoise'): trackT2Params[param] += [h[i].rglrzParam[param]] # cikkect BN params&updates if params.batchNorm and params.activation[i] != 'softmax': paramsBN += h[i].paramsBN updateBN += h[i].updateBN x = h[-1].output # pack variables for output for rglrz in globalParams.keys(): paramsT2 += [globalParams[rglrz]] self.paramsT1 = paramsT1 self.paramsT2 = paramsT2 self.paramsBN = paramsBN self.updateBN = updateBN # fix tracking of stats if params.trackStats: self.netStats = stat_monitor(layers = h, params = params) else: self.netStats = T.constant(0.) self.trackT2Params = trackT2Params # output and predicted labels self.h = h self.y = h[-1].output self.guessLabel = T.argmax(self.y, axis=1) self.penalty = penalty if penalty != 0. else T.constant(0.) self.guessLabel = T.argmax(self.y, axis=1) # cost functions def stable(x, stabilize=True): if stabilize: x = T.where(T.isnan(x), 1000., x) x = T.where(T.isinf(x), 1000., x) return x if params.cost == 'categorical_crossentropy': def costFun1(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError if params.cost_T2 in ['categorical_crossentropy', 'sigmoidal', 'hingeLoss']: def costFun2(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError def costFunT1(*args, **kwargs): return T.mean(costFun1(*args, **kwargs)) def costFunT2(*args, **kwargs): return T.mean(costFun2(*args, **kwargs)) # self.y1_avg = self.y1 # self.guessLabel1_avg = self.guessLabel1 # cost function self.trainCost = costFunT1(self.y, wantOut) self.classError = T.mean(T.cast(T.neq(self.guessLabel, wantOut), 'float32'))
def __init__(self, rng, rstream, x, wantOut, params, useRglrz, bnPhase, globalParams = None): ''' Constructing the convolutional model. Arguments: rng, rstream :: random streams x1, x2 :: x batches from T1 and T2 set wantOut1, wantOut2 :: corresponding labels params :: all model parameters graph :: theano variable determining how are BN params computed globalParams :: T2 params when one-per-network ''' # defining shared variables shared across layers if globalParams is None: globalParams = {} for rglrz in params.rglrzPerNetwork: tempParam = np.asarray(params.rglrzInitial[rglrz][0], dtype=theano.config.floatX) globalParams[rglrz] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 0), borrow=True) for rglrz in params.rglrzPerNetwork1: tempParam = np.asarray(params.rglrzInitial[rglrz][0], dtype=theano.config.floatX) globalParams[rglrz+str(0)] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 0), borrow=True) tempParam = np.asarray(params.rglrzInitial[rglrz][1], dtype=theano.config.floatX) globalParams[rglrz] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 1), borrow=True) # initializations of counters, lists and dictionaries i = 0 h = [] penalty = 0. trackT2Params = {} for param in params.rglrz: trackT2Params[param] = [] paramsT1, paramsT2, paramsBN, updateBN = [[],[],[],[]] netStats = {} for key in params.activTrack: netStats[key] = [] ''' Constructing layers. ''' for layer in params.convLayers: # construct layer print 'layer ', str(i), ':', layer.type, layer.filter, layer.maps, ' filters' if layer.type == 'conv': h.append(conv_layer(rng=rng, rstream=rstream, index=i, x=x, params=params, globalParams=globalParams, useRglrz=useRglrz, bnPhase=bnPhase, filterShape=layer.filter, inFilters=layer.maps[0], outFilters=layer.maps[1], stride=layer.stride)) elif layer.type == 'pool': h.append(pool_layer(rstream=rstream, index=i, x=x, params=params, useRglrz=useRglrz, bnPhase=bnPhase, poolShape=layer.filter, inFilters=layer.maps[0], outFilters=layer.maps[1], stride=layer.stride)) elif layer.type in ['average', 'average+softmax']: h.append(average_layer(rstream=rstream, index=i, x=x, params=params, useRglrz=useRglrz, bnPhase=bnPhase, poolShape=layer.filter, inFilters=layer.maps[0], outFilters=layer.maps[1], stride=layer.stride)) # elif layer.type == 'softmax': # h.append(mlp_layer(rng=rng, rstream=rstream, index=i, splitPoint=splitPoint, x=x, # params=params, globalParams=globalParams, graph=graph)) # collect penalty term if layer.type in ['conv', 'softmax'] and ('L2' in params.rglrz): if 'L2' in params.rglrzPerMap: tempW = h[-1].rglrzParam['L2'].dimshuffle(0, 'x', 'x', 'x') * T.sqr(h[-1].W) else: tempW = h[-1].rglrzParam['L2'] * T.sqr(h[-1].W) penalty += T.sum(tempW) # collect T1 params if layer.type in ['conv', 'softmax']: paramsT1 += h[i].paramsT1 elif params.batchNorm and params.convLayers[i].bn: paramsT1 += h[i].paramsT1 # collect T2 params if params.useT2: paramsT2 += h[i].paramsT2 # collect T2 for tracking for param in params.rglrz: if param == 'xNoise': if i==0 and layer.noise: trackT2Params[param] += [h[-1].rglrzParam[param]] else: trackT2Params[param] += [zero] if param == 'addNoise': if layer.noise: trackT2Params[param] += [h[-1].rglrzParam[param]] else: trackT2Params[param] += [zero] if param in ['L1', 'L2', 'Lmax']: if layer.type in ['conv', 'softmax']: trackT2Params[param] += [h[-1].rglrzParam[param]] else: trackT2Params[param] += [zero] # collect BN params&updates if params.batchNorm and params.convLayers[i].bn: paramsBN += h[-1].paramsBN updateBN += h[-1].updateBN x = h[-1].output i += 1 # pack variables for output for rglrz in globalParams.keys(): if rglrz in params.rglrzTrain: paramsT2 += [globalParams[rglrz]] self.paramsT1 = paramsT1 self.paramsT2 = paramsT2 self.paramsBN = paramsBN self.updateBN = updateBN # fix tracking of stats if params.trackStats: self.netStats = stat_monitor(layers = h, params = params) else: self.netStats = T.constant(0.) self.trackT2Params = trackT2Params for param in params.rglrz: print len(trackT2Params[param]) print '# t1 params: ', len(paramsT1), '# t2 params: ', len(paramsT2) # output and predicted labels self.h = h self.y = h[-1].output self.guessLabel = T.argmax(self.y, axis=1) self.penalty = penalty if penalty != 0. else T.constant(0.) self.guessLabel = T.argmax(self.y, axis=1) # cost functions def stable(x, stabilize=True): if stabilize: x = T.where(T.isnan(x), 1000., x) x = T.where(T.isinf(x), 1000., x) return x if params.cost == 'categorical_crossentropy': def costFun1(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError if params.cost_T2 in ['categorical_crossentropy', 'sigmoidal', 'hingeLoss']: def costFun2(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError def costFunT1(*args, **kwargs): return T.mean(costFun1(*args, **kwargs)) def costFunT2(*args, **kwargs): return T.mean(costFun2(*args, **kwargs)) # self.y1_avg = self.y1 # self.guessLabel1_avg = self.guessLabel1 # self.trainCost = useRglrz*costFunT1(self.y, wantOut) + (1-useRglrz)*costFunT2(self.y, wantOut) self.trainCost = costFunT1(self.y, wantOut) self.clasRate = T.mean(T.cast(T.neq(self.guessLabel, wantOut), 'float32'))
def __init__(self, rng, rstream, x, wantOut, params, useRglrz, bnPhase, globalParams=None): ''' Constructing the convolutional model. Arguments: rng, rstream :: random streams x1, x2 :: x batches from T1 and T2 set wantOut1, wantOut2 :: corresponding labels params :: all model parameters graph :: theano variable determining how are BN params computed globalParams :: T2 params when one-per-network ''' # defining shared variables shared across layers if globalParams is None: globalParams = {} for rglrz in params.rglrzPerNetwork: tempParam = np.asarray(params.rglrzInitial[rglrz][0], dtype=theano.config.floatX) globalParams[rglrz] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 0), borrow=True) for rglrz in params.rglrzPerNetwork1: tempParam = np.asarray(params.rglrzInitial[rglrz][0], dtype=theano.config.floatX) globalParams[rglrz + str(0)] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 0), borrow=True) tempParam = np.asarray(params.rglrzInitial[rglrz][1], dtype=theano.config.floatX) globalParams[rglrz] = theano.shared(value=tempParam, name='%s_%d' % (rglrz, 1), borrow=True) # initializations of counters, lists and dictionaries i = 0 h = [] penalty = 0. trackT2Params = {} for param in params.rglrz: trackT2Params[param] = [] paramsT1, paramsT2, paramsBN, updateBN = [[], [], [], []] netStats = {} for key in params.activTrack: netStats[key] = [] ''' Constructing layers. ''' for layer in params.convLayers: # construct layer print 'layer ', str( i), ':', layer.type, layer.filter, layer.maps, ' filters' if layer.type == 'conv': h.append( conv_layer(rng=rng, rstream=rstream, index=i, x=x, params=params, globalParams=globalParams, useRglrz=useRglrz, bnPhase=bnPhase, filterShape=layer.filter, inFilters=layer.maps[0], outFilters=layer.maps[1], stride=layer.stride)) elif layer.type == 'pool': h.append( pool_layer(rstream=rstream, index=i, x=x, params=params, useRglrz=useRglrz, bnPhase=bnPhase, poolShape=layer.filter, inFilters=layer.maps[0], outFilters=layer.maps[1], stride=layer.stride)) elif layer.type in ['average', 'average+softmax']: h.append( average_layer(rstream=rstream, index=i, x=x, params=params, useRglrz=useRglrz, bnPhase=bnPhase, poolShape=layer.filter, inFilters=layer.maps[0], outFilters=layer.maps[1], stride=layer.stride)) # elif layer.type == 'softmax': # h.append(mlp_layer(rng=rng, rstream=rstream, index=i, splitPoint=splitPoint, x=x, # params=params, globalParams=globalParams, graph=graph)) # collect penalty term if layer.type in ['conv', 'softmax'] and ('L2' in params.rglrz): if 'L2' in params.rglrzPerMap: tempW = h[-1].rglrzParam['L2'].dimshuffle( 0, 'x', 'x', 'x') * T.sqr(h[-1].W) else: tempW = h[-1].rglrzParam['L2'] * T.sqr(h[-1].W) penalty += T.sum(tempW) # collect T1 params if layer.type in ['conv', 'softmax']: paramsT1 += h[i].paramsT1 elif params.batchNorm and params.convLayers[i].bn: paramsT1 += h[i].paramsT1 # collect T2 params if params.useT2: paramsT2 += h[i].paramsT2 # collect T2 for tracking for param in params.rglrz: if param == 'xNoise': if i == 0 and layer.noise: trackT2Params[param] += [h[-1].rglrzParam[param]] else: trackT2Params[param] += [zero] if param == 'addNoise': if layer.noise: trackT2Params[param] += [h[-1].rglrzParam[param]] else: trackT2Params[param] += [zero] if param in ['L1', 'L2', 'Lmax']: if layer.type in ['conv', 'softmax']: trackT2Params[param] += [h[-1].rglrzParam[param]] else: trackT2Params[param] += [zero] # collect BN params&updates if params.batchNorm and params.convLayers[i].bn: paramsBN += h[-1].paramsBN updateBN += h[-1].updateBN x = h[-1].output i += 1 # pack variables for output for rglrz in globalParams.keys(): if rglrz in params.rglrzTrain: paramsT2 += [globalParams[rglrz]] self.paramsT1 = paramsT1 self.paramsT2 = paramsT2 self.paramsBN = paramsBN self.updateBN = updateBN # fix tracking of stats if params.trackStats: self.netStats = stat_monitor(layers=h, params=params) else: self.netStats = T.constant(0.) self.trackT2Params = trackT2Params for param in params.rglrz: print len(trackT2Params[param]) print '# t1 params: ', len(paramsT1), '# t2 params: ', len(paramsT2) # output and predicted labels self.h = h self.y = h[-1].output self.guessLabel = T.argmax(self.y, axis=1) self.penalty = penalty if penalty != 0. else T.constant(0.) self.guessLabel = T.argmax(self.y, axis=1) # cost functions def stable(x, stabilize=True): if stabilize: x = T.where(T.isnan(x), 1000., x) x = T.where(T.isinf(x), 1000., x) return x if params.cost == 'categorical_crossentropy': def costFun1(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError if params.cost_T2 in [ 'categorical_crossentropy', 'sigmoidal', 'hingeLoss' ]: def costFun2(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError def costFunT1(*args, **kwargs): return T.mean(costFun1(*args, **kwargs)) def costFunT2(*args, **kwargs): return T.mean(costFun2(*args, **kwargs)) # self.y1_avg = self.y1 # self.guessLabel1_avg = self.guessLabel1 # self.trainCost = useRglrz*costFunT1(self.y, wantOut) + (1-useRglrz)*costFunT2(self.y, wantOut) self.trainCost = costFunT1(self.y, wantOut) self.clasRate = T.mean( T.cast(T.neq(self.guessLabel, wantOut), 'float32'))