def mcr(x, y): """ build expression of mean correlation with respect to all features. The first dimension of {x} and {y} are treated assample index, the rest span the feature space. Mean correlation of all features usually measures prediction performance for real continuous target variables. """ # make sure x is a matrix, and also *symbolic* if x.ndim < 2: x = T.reshape(x, [x.size, 1]) else: x = T.reshape(x, [x.shape[0], -1]) # make sure y is a matrix, and also *symbolic* if y.ndim < 2: y = T.reshape(y, [y.size, 1]) else: y = T.reshape(y, [y.shape[0], -1]) # z-scores of every feature for both x and y zx = (x - T.mean(x, 0, keepdims=True)) / T.std(x, 0, keepdims=True) zy = (y - T.mean(y, 0, keepdims=True)) / T.std(y, 0, keepdims=True) # row mean of z-score product are (P) correlations, further, the column # mean of P correlations give the mean correlation. rr = (zx * zy).mean() return rr
def build_model(conv0, doses, timeV, expTable): """ Builds then returns the pyMC model. """ growth_model = pm.Model() with growth_model: conversions = conversionPriors(conv0) d, apopfrac = deathPriors(len(doses)) # Specify vectors of prior distributions # Growth rate div = pm.Uniform("div", lower=0.0, upper=0.035, shape=len(doses)) # Rate of entering apoptosis or skipping straight to death deathRate = pm.Lognormal("deathRate", np.log(0.001), 0.5, shape=len(doses)) lnum, eap, deadapop, deadnec = theanoCore(timeV, div, deathRate, apopfrac, d) # Convert model calculations to experimental measurement units confl_exp, apop_exp, dna_exp = convSignal(lnum, eap, deadapop, deadnec, conversions) # Observed error values for confl confl_obs = T.reshape(confl_exp, (-1,)) - expTable["confl"] pm.Normal("dataFit", sd=T.std(confl_obs), observed=confl_obs) # Observed error values for apop apop_obs = T.reshape(apop_exp, (-1,)) - expTable["apop"] pm.Normal("dataFita", sd=T.std(apop_obs), observed=apop_obs) # Observed error values for dna dna_obs = T.reshape(dna_exp, (-1,)) - expTable["dna"] pm.Normal("dataFitd", sd=T.std(dna_obs), observed=dna_obs) return growth_model
def _output(self, input, *args, **kwargs): input = self.input_layer.output() out = T.switch(T.gt(input, 0), 1, 0) if out.ndim > 2: std = T.std(out, axis=(0, 2, 3)) else: std = T.std(out, axis=0) return T.concatenate([T.mean(std).reshape((1,)), T.mean(out).reshape((1,))])
def cross_correlation(x, y): x_mean = mean(x) y_mean = mean(y) x_stdev = std(x) y_stdev = std(y) y_dev = y - y_mean x_dev = x - x_mean return 1 - (mean(x_dev*y_dev / (x_stdev*y_stdev)))
def build(self): """The PyMC model that incorporates Bayesian Statistics in order to store what the likelihood of the model is for a given point.""" M = pm.Model() with M: kfwd, endo, activeEndo, kRec, kDeg, sortF = commonTraf( trafficking=self.traf) rxnrates = pm.Lognormal( "rxn", sigma=0.5, shape=6) # 6 reverse rxn rates for IL2/IL15 nullRates = T.ones( 4, dtype=np.float64) # k27rev, k31rev, k33rev, k35rev Rexpr_2Ra = pm.Lognormal("Rexpr_2Ra", sigma=0.5, shape=1) # Expression: IL2Ra Rexpr_2Rb = pm.Lognormal("Rexpr_2Rb", sigma=0.5, shape=1) # Expression: IL2Rb Rexpr_15Ra = pm.Lognormal("Rexpr_15Ra", sigma=0.5, shape=1) # Expression: IL15Ra Rexpr_gc = pm.Lognormal("Rexpr_gc", sigma=0.5, shape=1) # Expression: gamma chain unkVec = T.concatenate( (kfwd, rxnrates, nullRates, endo, activeEndo, sortF, kRec, kDeg, Rexpr_2Ra, Rexpr_2Rb, Rexpr_gc, Rexpr_15Ra, nullRates * 0.0)) Y_15 = self.dst15.calc( unkVec ) # fitting the data based on dst15.calc for the given parameters sd_15 = T.minimum( T.std(Y_15), 0.03 ) # Add bounds for the stderr to help force the fitting solution pm.Deterministic("Y_15", T.sum(T.square(Y_15))) pm.Normal("fitD_15", sigma=sd_15, observed=Y_15) # experimental-derived stderr is used if self.traf: Y_int = self.IL2Rb.calc( unkVec) # fitting the data based on IL2Rb surface data sd_int = T.minimum( T.std(Y_int), 0.02 ) # Add bounds for the stderr to help force the fitting solution pm.Deterministic("Y_int", T.sum(T.square(Y_int))) pm.Normal("fitD_int", sigma=sd_int, observed=Y_int) Y_gc = self.gc.calc( unkVec) # fitting the data using IL2Ra- cells sd_gc = T.minimum( T.std(Y_gc), 0.02 ) # Add bounds for the stderr to help force the fitting solution pm.Deterministic("Y_gc", T.sum(T.square(Y_gc))) pm.Normal("fitD_gc", sigma=sd_gc, observed=Y_gc) # Save likelihood pm.Deterministic("logp", M.logpt) return M
def build_theano_models(self, algo, algo_params): epsilon = 1e-6 kl = lambda mu, sig: sig+mu**2-TT.log(sig) X, y = TT.dmatrices('X', 'y') params = TT.dvector('params') a, b, c, l_F, F, l_FC, FC = self.unpack_params(params) sig2_n, sig_f = TT.exp(2*a), TT.exp(b) l_FF = TT.dot(X, l_F)+l_FC FF = TT.concatenate((l_FF, TT.dot(X, F)+FC), 1) Phi = TT.concatenate((TT.cos(FF), TT.sin(FF)), 1) Phi = sig_f*TT.sqrt(2./self.M)*Phi noise = TT.log(1+TT.exp(c)) PhiTPhi = TT.dot(Phi.T, Phi) A = PhiTPhi+(sig2_n+epsilon)*TT.identity_like(PhiTPhi) L = Tlin.cholesky(A) Li = Tlin.matrix_inverse(L) PhiTy = Phi.T.dot(y) beta = TT.dot(Li, PhiTy) alpha = TT.dot(Li.T, beta) mu_f = TT.dot(Phi, alpha) var_f = (TT.dot(Phi, Li.T)**2).sum(1)[:, None] dsp = noise*(var_f+1) mu_l = TT.sum(TT.mean(l_F, axis=1)) sig_l = TT.sum(TT.std(l_F, axis=1)) mu_w = TT.sum(TT.mean(F, axis=1)) sig_w = TT.sum(TT.std(F, axis=1)) hermgauss = np.polynomial.hermite.hermgauss(30) herm_x = Ts(hermgauss[0])[None, None, :] herm_w = Ts(hermgauss[1]/np.sqrt(np.pi))[None, None, :] herm_f = TT.sqrt(2*var_f[:, :, None])*herm_x+mu_f[:, :, None] nlk = (0.5*herm_f**2.-y[:, :, None]*herm_f)/dsp[:, :, None]+0.5*( TT.log(2*np.pi*dsp[:, :, None])+y[:, :, None]**2/dsp[:, :, None]) enll = herm_w*nlk nlml = 2*TT.log(TT.diagonal(L)).sum()+2*enll.sum()+1./sig2_n*( (y**2).sum()-(beta**2).sum())+2*(X.shape[0]-self.M)*a penelty = (kl(mu_w, sig_w)*self.M+kl(mu_l, sig_l)*self.S)/(self.S+self.M) cost = (nlml+penelty)/X.shape[0] grads = TT.grad(cost, params) updates = getattr(OPT, algo)(self.params, grads, **algo_params) updates = getattr(OPT, 'apply_nesterov_momentum')(updates, momentum=0.9) train_inputs = [X, y] train_outputs = [cost, alpha, Li] self.train_func = Tf(train_inputs, train_outputs, givens=[(params, self.params)]) self.train_iter_func = Tf(train_inputs, train_outputs, givens=[(params, self.params)], updates=updates) Xs, Li, alpha = TT.dmatrices('Xs', 'Li', 'alpha') l_FFs = TT.dot(Xs, l_F)+l_FC FFs = TT.concatenate((l_FFs, TT.dot(Xs, F)+FC), 1) Phis = TT.concatenate((TT.cos(FFs), TT.sin(FFs)), 1) Phis = sig_f*TT.sqrt(2./self.M)*Phis mu_pred = TT.dot(Phis, alpha) std_pred = (noise*(1+(TT.dot(Phis, Li.T)**2).sum(1)))**0.5 pred_inputs = [Xs, alpha, Li] pred_outputs = [mu_pred, std_pred] self.pred_func = Tf(pred_inputs, pred_outputs, givens=[(params, self.params)])
def __build_center(self): #We only want to compile our theano functions once imgv = T.dtensor3('imgv') # Get the mean u = T.mean(imgv, 0) # Get the standard deviation s = T.std(T.std(imgv, 0), 0) # Subtract our mean return function(inputs=[imgv], outputs=[(imgv - u) / s])
def __build_center(self): # We only want to compile our theano functions once imgv = T.dtensor3("imgv") # Get the mean u = T.mean(imgv, 0) # Get the standard deviation s = T.std(T.std(imgv, 0), 0) # Subtract our mean return function(inputs=[imgv], outputs=[(imgv - u) / s])
def __init__(self, inputs, channels, activation, dims=2, batch=None): assert dims in [2, 4, None] BatchNorm.c += 1 c = BatchNorm.c inputs = inputs.output if dims == 2: g = np.ones((channels, ), dtype=theano.config.floatX) b = np.zeroes((channels, ), dtype=theano.config.floatX) self.G = theano.shared(g, 'G_bn' + str(c)) self.B = theano.shared(b, 'B_bn' + str(c)) mean = T.mean(inputs, axis=0) if batch is None else T.mean(batch, axis=0) std = T.std(inputs, axis=0) if batch is None else T.std(batch, axis=0) self.params = [self.G, self.B] self.stats = [mean, std] A = self.G * (inputs - mean) / std + self.B self.output = Tool.fct[activation](A) elif dims == 4: g = np.ones((channels, ), dtype=theano.config.floatX) b = np.zeros((channels, ), dtype=theano.config.floatX) self.G = theano.shared(g, 'G_bn' + str(c)) self.B = theano.shared(b, 'B_bn' + str(c)) if batch is None: mean = T.mean(inputs, axis=(0, 2, 3)).dimshuffle('x', 0, 'x', 'x') std = T.std(inputs, axis=(0, 2, 3)).dimshuffle('x', 0, 'x', 'x') else: mean = T.mean(batch, axis=(0, 2, 3)).dimshuffle('x', 0, 'x', 'x') std = T.std(batch, axis=(0, 2, 3)).dimshuffle('x', 0, 'x', 'x') self.params = [self.G, self.B] self.stats = [mean, std] A = self.G.dimshuffle('x', 0, 'x', 'x') * ( inputs - mean) / std + self.B.dimshuffle('x', 0, 'x', 'x') self.output = Tool.fct[activation](A) elif dims == None: mean, std = None, None self.output = Tool.fct[activation](inputs) self.params = [] self.mean = mean self.std = std
def CC(output, target): output = T.div_proxy(T.sub(output, T.mean(output)), T.std(output)) target = T.div_proxy(T.sub(target, T.mean(target)), T.std(target)) num = T.sub(output, T.mean(output)) * T.sub(target, T.mean(target)) out_square = T.square(T.sub(output, T.mean(output))) tar_square = T.square(T.sub(target, T.mean(target))) CC_score = T.sum(num) / (T.sqrt(T.sum(out_square) * T.sum(tar_square))) #if T.isnan(CC_score): # CC_score = 0 return CC_score
def build_model(X1, X2, timeV, conv0=0.1, confl=None, apop=None, dna=None): """ Builds then returns the PyMC model. """ assert X1.shape == X2.shape M = pm.Model() with M: conversions = conversionPriors(conv0) d, apopfrac = deathPriors(1) # parameters for drug 1, 2; assumed to be the same for both phenotypes hill = pm.Lognormal("hill", shape=2) IC50 = pm.Lognormal("IC50", shape=2) EmaxGrowth = pm.Beta("EmaxGrowth", 1.0, 1.0, shape=2) EmaxDeath = pm.Lognormal("EmaxDeath", -2.0, 0.5, shape=2) # E_con values; first death then growth GrowthCon = pm.Lognormal("GrowthCon", np.log10(0.03), 0.1) # Calculate the death rate death_rates = blissInteract(X1, X2, hill, IC50, EmaxDeath, justAdd=True) # pylint: disable=unsubscriptable-object # Calculate the growth rate growth_rates = GrowthCon * (1 - blissInteract(X1, X2, hill, IC50, EmaxGrowth)) # pylint: disable=unsubscriptable-object pm.Deterministic("EmaxGrowthEffect", GrowthCon * EmaxGrowth) # Test the dimension of growth_rates growth_rates = T.opt.Assert("growth_rates did not match X1 size")(growth_rates, T.eq(growth_rates.size, X1.size)) lnum, eap, deadapop, deadnec = theanoCore(timeV, growth_rates, death_rates, apopfrac, d) # Test the size of lnum lnum = T.opt.Assert("lnum did not match X1*timeV size")(lnum, T.eq(lnum.size, X1.size * timeV.size)) confl_exp, apop_exp, dna_exp = convSignal(lnum, eap, deadapop, deadnec, conversions) # Compare to experimental observation if confl is not None: confl_obs = T.flatten(confl_exp - confl) pm.Normal("confl_fit", sd=T.std(confl_obs), observed=confl_obs) conflmean = T.mean(confl, axis=1) confl_exp_mean = T.mean(confl_exp, axis=1) pm.Deterministic("conflResid", (confl_exp_mean - conflmean) / conflmean[0]) if apop is not None: apop_obs = T.flatten(apop_exp - apop) pm.Normal("apop_fit", sd=T.std(apop_obs), observed=apop_obs) if dna is not None: dna_obs = T.flatten(dna_exp - dna) pm.Normal("dna_fit", sd=T.std(dna_obs), observed=dna_obs) return M
def output(self, input_raw): input = input_raw lin_output = T.dot(input, self.W) + self.b if self.batch_norm: lin_output = (lin_output - T.mean(lin_output, axis=0, keepdims=True)) / ( 1.0 + T.std(lin_output, axis=0, keepdims=True)) lin_output = (lin_output * T.addbroadcast(self.bn_std, 0) + T.addbroadcast(self.bn_mean, 0)) if self.layer_norm: lin_output = (lin_output - T.mean(lin_output, axis=1, keepdims=True)) / ( 1.0 + T.std(lin_output, axis=1, keepdims=True)) lin_output = (lin_output * T.addbroadcast(self.bn_std, 0) + T.addbroadcast(self.bn_mean, 0)) if self.norm_prop: lin_output = lin_output / T.sqrt(T.mean(T.sqr(lin_output), axis=0)) lin_output = (lin_output * T.addbroadcast(self.bn_std, 0) + T.addbroadcast(self.bn_mean, 0)) clip_preactive = True if clip_preactive: lin_output = theano.tensor.clip(lin_output, -10, 10) self.out_store = lin_output if self.activation == None: activation = lambda x: x elif self.activation == "relu": activation = lambda x: T.maximum(0.0, x) elif self.activation == "lrelu": activation = lambda x: T.nnet.relu(x, alpha=0.02) elif self.activation == "exp": activation = lambda x: T.exp(x) elif self.activation == "tanh": activation = lambda x: T.tanh(x) elif self.activation == 'softplus': activation = lambda x: T.nnet.softplus(x) elif self.activation == 'sigmoid': activation = lambda x: T.nnet.sigmoid(x) else: raise Exception("Activation not found") out = activation(lin_output) return out
def model(self, X, w1, w2, w3, w4, w5, w6,w_o, p_drop_conv, p_drop_hidden): l1a = l.rectify(conv2d(X, w1, border_mode='valid') + self.b1) l1 = max_pool_2d(l1a, (2, 2), ignore_border=True) #l1 = l.dropout(l1, p_drop_conv) l2a = l.rectify(conv2d(l1, w2,border_mode='valid') + self.b2) l2 = max_pool_2d(l2a, (2, 2), ignore_border=True) #l2 = l.dropout(l2, p_drop_conv) l3 = l.rectify(conv2d(l2, w3, border_mode='valid') + self.b3) #l3 = l.dropout(l3a, p_drop_conv) l4a = l.rectify(conv2d(l3, w4, border_mode='valid') + self.b4) l4 = max_pool_2d(l4a, (2, 2), ignore_border=True) #l4 = T.flatten(l4, outdim=2) #l4 = l.dropout(l4, p_drop_conv) l5 = l.rectify(conv2d(l4, w5, border_mode='valid') + self.b5) #l5 = l.dropout(l5, p_drop_hidden) l6 = l.rectify(conv2d(l5, w6, border_mode='valid') + self.b6) #l6 = l.dropout(l6, p_drop_hidden) #l6 = self.bn(l6, self.g,self.b,self.m,self.v) l6 = conv2d(l6, w_o, border_mode='valid') #l6 = self.bn(l6, self.g, self.b, T.mean(l6, axis=1), T.std(l6,axis=1)) l6 = T.flatten(l6, outdim=2) #l6 = ((l6 - T.mean(l6, axis=0))/T.std(l6,axis=0))*self.g + self.b#self.bn( l6, self.g,self.b,T.mean(l6, axis=0),T.std(l6,axis=0) ) l6 = ((l6 - T.mean(l6, axis=0))/(T.std(l6,axis=0)+1e-4))*self.g + self.b pyx = T.nnet.softmax(l6) return l1, l2, l3, l4, l5, l6, pyx
def __init__(self, input, shape, gamma=None, beta=None, epsilon=1e-6, activation_fn=None): self.input = input self.shape = shape rng = np.random.RandomState(45) if gamma is None: gamma_values = rng.uniform(low=-1.0, high=1.0, size=shape)\ .astype(theano.config.floatX) gamma = theano.shared(name='gamma', value=gamma_values, borrow=True) if beta is None: beta_values = np.zeros(shape=shape, dtype=theano.config.floatX)\ .astype(theano.config.floatX) beta = theano.shared(name='beta', value=beta_values, borrow=True) self.gamma = gamma self.beta = beta self.mean = T.mean(input, axis=0) self.std = T.std(input + epsilon, axis=0) l_output = T.nnet.bn.batch_normalization(input, self.gamma, self.beta, self.mean, self.std) self.output = (l_output if activation_fn is None else activation_fn(l_output)) self.params = [self.gamma, self.beta]
def core(self, x): if self.core_name == 'sigmoid': res = T.nnet.sigmoid(x) elif self.core_name == 'softplus': res = T.log(T.exp(x) + 1) elif self.core_name == 'relu': res = x / (T.std(x, axis=1).reshape((-1, 1)) + 1) res = T.switch(res > 0, res, 0) elif self.core_name == 'abstanh': res = T.abs_(T.tanh(x)) elif self.core_name == 'tanh': res = T.tanh(x) elif self.core_name == 'linear': res = x * 1.0 elif self.core_name == 'one_relu': res = T.switch(x < 1, x, 1) res = T.switch(res > -1, res, -1) elif self.core_name == 'strelu': res = x res = T.switch(res > 0, res, 0) else: res = T.nnet.sigmoid(x) return res
def core(self, x): if self.core_name == 'sigmoid': res = T.nnet.sigmoid(x) elif self.core_name == 'softplus': res = T.log(T.exp(x) + 1) elif self.core_name == 'relu': #res = x / T.switch(T.eq(T.std(x),0), 1, T.std(x)) res = x / (T.std(x, axis=(1, 2, 3)).dimshuffle( (0, 'x', 'x', 'x')) + 1) res = T.switch(res > 0, res, 0) elif self.core_name == 'abstanh': res = T.abs_(T.tanh(x)) elif self.core_name == 'tanh': res = T.tanh(x) elif self.core_name == 'linear': res = x elif self.core_name == 'one_relu': res = T.switch(x < 1, x, 1) res = T.switch(res > -1, res, -1) elif self.core_name == 'strelu': res = x res = T.switch(res > 0, res, 0) else: res = T.nnet.sigmoid(x) return res
def _train_fprop(self, state_below): miu = state_below.mean(axis=0) std = T.std(state_below, axis=0) self.moving_mean += self.mem * miu + (1-self.mem) * self.moving_mean self.moving_std += self.mem * std + (1-self.mem) * self.moving_std Z = (state_below - self.moving_mean) / (self.moving_std + self.epsilon) return self.gamma * Z + self.beta
def th_MvLDAN(data_inputs, labels): n_view = len(data_inputs) dtype = 'float32' mean = [] std = [] data = [] for v in range(n_view): _data = theano.shared(data_inputs[v]) _mean = T.mean(_data, axis=0).reshape([1, -1]) _std = T.std(_data, axis=0).reshape([1, -1]) _std += T.eq(_std, 0).astype(dtype) data.append((_data - _mean) / _std) mean.append(_mean) std.append(_std) Sw, Sb, _ = th_MvLDAN_Sw_Sb(data, labels) from theano.tensor import nlinalg eigvals, eigvecs = nlinalg.eig(T.dot(nlinalg.matrix_inverse(Sw), Sb)) # evals = slinalg.eigvalsh(Sb, Sw) mean = list(theano.function([], mean)()) std = list(theano.function([], std)()) eigvals, eigvecs = theano.function([], [eigvals, eigvecs])() inx = np.argsort(eigvals)[::-1] eigvals = eigvals[inx] eigvecs = eigvecs[:, inx] W = [] pre = 0 for v in range(n_view): W.append(eigvecs[pre:pre + mean[v].shape[1], :]) pre += mean[v].shape[1] return [mean, std], W, eigvals
def __init__(self, input, input_shape): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) """ # start-snippet-1 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.gamma = theano.shared(value=numpy.ones( input_shape, dtype=theano.config.floatX), name='gamma', borrow=True) # initialize the biases b as a vector of n_out 0s self.beta = theano.shared(value=numpy.zeros( input_shape, dtype=theano.config.floatX), name='beta', borrow=True) self.input = input self.mean = T.mean(self.input, axis=0) self.std = T.std(self.input, axis=0) self.output = T.nnet.bn.batch_normalization(self.input, self.gamma, self.beta, self.mean, self.std)
def collect_statistics(self, X): """Updates Statistics of data""" stat_mean = T.mean(X, axis=0) stat_std = T.std(X, axis=0) updates_stats = [(self.stat_mean, stat_mean), (self.stat_std, stat_std)] return updates_stats
def apply(self, v, **kwargs): input = v.output z = T.mean(input) stdev = T.std(input) nv = vcopy(v) histogram = [] buckets = self.get_buckets() for beg, end in buckets: a = T.ge(input, beg) b = T.lt(input, end) percent = T.sum(a * b) / T.prod(input.shape).astype(floatX) histogram.append(percent) r = { 'name': self.name, 'mean': z, 'stdev': stdev, 'histogram': histogram } if 'activation_monitoring' in nv: nv.activation_monitoring.append(r) else: nv.activation_monitoring = [r] return self.post_apply(nv, **kwargs)
def get_samples(): # get samples from the model X, Y = T.fmatrices(2) givens_train_samples = {X: train_x[0:50000], Y: train_y[0:50000]} H1, H2 = iteration(X, 15, 0.1) # get prior statistics (100 mean and std) H2_mean = T.mean(H2, axis=0) H2_std = T.std(H2, axis=0) # sampling h2 from prior H2_ = RNG.normal((10000, 100), avg=H2_mean, std=4 * H2_std, ndim=None, dtype=H2.dtype, nstreams=None) # iterative sampling from samples h2 X_ = G1(G2(H2_)) for i in range(3): H1_, H2_ = iteration(X_, 15, 0.1, 3) X_ = G1(H1_) #H1_, H2_ = iteration(X_, 1, 0.1, 3) #X_ = G1(H1_) sampling = theano.function([], X_, on_unused_input='ignore', givens=givens_train_samples) samples = sampling() np.save('samples', samples) return samples
def output(self, input): input = T.specify_shape( input, (self.batch_size, self.in_channels, self.in_length)) conv_out = conv1d_mc0(input, self.W, image_shape=(self.batch_size, self.in_channels, self.in_length), filter_shape=(self.out_channels, self.in_channels, self.filter_length), subsample=(self.stride, )) #was mb, filters, x, y #now mb, filters, x if self.batch_norm: conv_out = (conv_out - T.mean(conv_out, axis=(0, 2), keepdims=True) ) / (1.0 + T.std(conv_out, axis=(0, 2), keepdims=True)) conv_out += self.b.dimshuffle('x', 0, 'x') if self.activation == "relu": self.out = T.maximum(0.0, conv_out) elif self.activation == "tanh": self.out = T.tanh(conv_out) elif self.activation == None: self.out = conv_out return self.out
def calc(self, unkVec, M): """ Simulate the experiment with different ligand stimulations and compare with experimental data. """ Op = runCkineDoseOp(tt=np.array(10.0), condense=getTotalActiveSpecies().astype( np.float64), conditions=self.cytokM) # Run the experiment outt = Op(unkVec) actVecIL4 = outt[0:self.nDoses] actVecIL7 = outt[self.nDoses:self.nDoses * 2] # normalize each actVec by its maximum actVecIL4 = actVecIL4 / T.max(actVecIL4) actVecIL7 = actVecIL7 / T.max(actVecIL7) actVecIL4 = T.tile(actVecIL4, (2, 1)) actVecIL7 = T.tile(actVecIL7, (2, 1)) # put into one vector actVecIL4 = T.flatten(self.dataIL4.T - actVecIL4) actVecIL7 = T.flatten(self.dataIL7.T - actVecIL7) Y_int = T.concatenate((actVecIL4, actVecIL7)) with M: pm.Deterministic("Y_int", T.sum(T.square(Y_int))) pm.Normal("fitD_int", sigma=T.minimum(T.std(Y_int), 0.1), observed=Y_int)
def get_data_stats(all_scores): mean = T.switch(all_scores.shape[0], T.mean(all_scores), 0) #T.switch(T.sum(all_scores),T.mean(all_scores),0) std = T.switch(all_scores.shape[0], T.std(all_scores), 1) #T.switch(T.sum(all_scores),T.std(all_scores),1) std = T.switch(std, std, 1) return mean, std #,T.max(all_scores),T.min(all_scores)
def correlation(input1,input2): n=T.shape(input1) n0=n[0] n1=n[1] s0=T.std(input1,axis=1,keepdims=True)#.reshape((n0,1)),reps=n1) s1=T.std(input2,axis=1,keepdims=True)#.reshape((n0,1)),reps=n1) m0=T.mean(input1,axis=1,keepdims=True) m1=T.mean(input2,axis=1,keepdims=True) corr=T.sum(((input1-m0)/s0)*((input2-m1)/s1), axis=1)/n1 corr=(corr+np.float32(1.))/np.float32(2.) corr=T.reshape(corr,(n0,)) return corr
def get_stats(input, stat=None): """ Returns a dictionary mapping the name of the statistic to the result on the input. Currently gets mean, var, std, min, max, l1, l2. Parameters ---------- input : tensor Theano tensor to grab stats for. Returns ------- dict Dictionary of all the statistics expressions {string_name: theano expression} """ stats = { 'mean': T.mean(input), 'var': T.var(input), 'std': T.std(input), 'min': T.min(input), 'max': T.max(input), 'l1': input.norm(L=1), 'l2': input.norm(L=2), #'num_nonzero': T.sum(T.nonzero(input)), } stat_list = raise_to_list(stat) compiled_stats = {} if stat_list is None: return stats for stat in stat_list: if isinstance(stat, string_types) and stat in stats: compiled_stats.update({stat: stats[stat]}) return compiled_stats
def _build_activation(self, act=None): '''Given an activation description, return a callable that implements it. ''' def compose(a, b): c = lambda z: b(a(z)) c.__theanets_name__ = '%s(%s)' % (b.__theanets_name__, a.__theanets_name__) return c act = act or self.args.activation.lower() if '+' in act: return reduce(compose, (self._build_activation(a) for a in act.split('+'))) options = { 'tanh': TT.tanh, 'linear': lambda z: z, 'logistic': TT.nnet.sigmoid, 'softplus': TT.nnet.softplus, # shorthands 'relu': lambda z: TT.maximum(0, z), # modifiers 'rect:max': lambda z: TT.minimum(1, z), 'rect:min': lambda z: TT.maximum(0, z), # normalization 'norm:dc': lambda z: (z.T - z.mean(axis=1)).T, 'norm:max': lambda z: (z.T / TT.maximum(1e-10, abs(z).max(axis=1))).T, 'norm:std': lambda z: (z.T / TT.maximum(1e-10, TT.std(z, axis=1))).T, } for k, v in options.iteritems(): v.__theanets_name__ = k try: return options[act] except: raise KeyError('unknown --activation %s' % act)
def batch_normalize(Y): """ Set columns of Y to zero mean and unit variance. """ Y_zmuv = (Y - T.mean(Y, axis=0, keepdims=True)) / \ T.std(Y, axis=0, keepdims=True) return Y_zmuv
def get_stats(input, stat=None): """ Returns a dictionary mapping the name of the statistic to the result on the input. Currently gets mean, var, std, min, max, l1, l2. Parameters ---------- input : tensor Theano tensor to grab stats for. Returns ------- dict Dictionary of all the statistics expressions {string_name: theano expression} """ stats = { 'mean': T.mean(input), 'var': T.var(input), 'std': T.std(input), 'min': T.min(input), 'max': T.max(input), 'l1': input.norm(L=1), 'l2': input.norm(L=2), #'num_nonzero': T.sum(T.nonzero(input)), } stat_list = raise_to_list(stat) compiled_stats = {} if stat_list is None: return stats for stat in stat_list: if isinstance(stat, six.string_types) and stat in stats: compiled_stats.update({stat: stats[stat]}) return compiled_stats
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(dim=500, mlp_hidden_dims=[400, 4], batch_size=100, image_shape=(100, 100), patch_shape=(28, 28), weights_init=Glorot(), biases_init=Constant(0)) model.initialize() h, c, location, scale = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [500, 100, 10], weights_init=Glorot(), biases_init=Constant(0)) model.h = h classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) location_x_avg = T.mean(location[:, 0]) location_x_avg.name = 'location_x_avg' location_y_avg = T.mean(location[:, 1]) location_y_avg.name = 'location_y_avg' scale_x_avg = T.mean(scale[:, 0]) scale_x_avg.name = 'scale_x_avg' scale_y_avg = T.mean(scale[:, 1]) scale_y_avg.name = 'scale_y_avg' location_x_std = T.std(location[:, 0]) location_x_std.name = 'location_x_std' location_y_std = T.std(location[:, 1]) location_y_std.name = 'location_y_std' scale_x_std = T.std(scale[:, 0]) scale_x_std.name = 'scale_x_std' scale_y_std = T.std(scale[:, 1]) scale_y_std.name = 'scale_y_std' monitorings = [error_rate, location_x_avg, location_y_avg, scale_x_avg, scale_y_avg, location_x_std, location_y_std, scale_x_std, scale_y_std] return cost, monitorings
def _layer_stats(self, state_below, layer_output): ls = super(PRELU, self)._layer_stats(state_below, layer_output) rlist = [] rlist.append(('alpha_mean', T.mean(self.alpha))) rlist.append(('alpha_max', T.max(self.alpha))) rlist.append(('alpha_min', T.min(self.alpha))) rlist.append(('alpha_std', T.std(self.alpha))) return ls + rlist
def recurrence(x_t, h_tm1): a_t = T.dot(x_t, self.wx) + T.dot(h_tm1, self.wh) mu_t = T.mean(a_t) #sigma_t = T.sqrt(T.var(a_t)) sigma_t = T.std(a_t) h_t = T.nnet.sigmoid((self.g / sigma_t) * (a_t - mu_t) + self.bh) s_t = T.nnet.softmax(T.dot(h_t, self.w) + self.b) return [h_t, s_t]
def get_output_for(self, input, **kwargs): input1=input[0,] input2=input[1,] n=self.input_shape #n0=n[1] n1=n[2] # tt=tuple([n0,1]) s0=T.std(input1,axis=1,keepdims=True) s1=T.std(input2,axis=1,keepdims=True) m0=T.mean(input1,axis=1,keepdims=True) m1=T.mean(input2,axis=1,keepdims=True) corr=T.sum(((input1-m0)/s0)*((input2-m1)/s1), axis=1)/n1 corr=(corr+np.float32(1.))/np.float32(2.) return corr
def batch_norm(self, input): bn_mean = T.mean(input, axis=0) bn_var = T.std(input, axis=0) output = T.nnet.batch_normalization( input, self._gamma, self._beta, bn_mean, bn_var ) return output
def collect_statistics(self, X): """ updates statistics on data""" stat_mean = T.mean(X, axis=0) stat_std = T.std(X, axis=0) updates_stats = [(self.stat_mean, stat_mean), (self.stat_std, stat_std)] return updates_stats
def cost_diversity(self): std = T.std(self.W, axis=1) mean = T.mean(self.W, axis=1, dtype=fx) results, _ = theano.scan(lambda s1,m1,s2,m2 : T.log(s2/s1) + ((s1**2+(m1-m2)**2) / (2*(s2**2))), sequences=[std, mean], non_sequences=[std, mean]) return -1.0 * T.mean(results) * self.diversity_strength
def testFcn(self,massBinned,trainY,trainX): y = T.dvector('y') varBinned = T.ivector('var') baseHist = T.bincount(varBinned,1-y)+0.01 selectedHist = T.bincount(varBinned,(1-y)*self.outLayer.P[T.arange(y.shape[0]),1])+0.01 print baseHist.eval({y:trainY, varBinned:massBinned}), selectedHist.eval({y:trainY, varBinned:massBinned, self.input:trainX}) rTensor = T.std(selectedHist/baseHist) return (rTensor).eval({y:trainY, varBinned:massBinned, self.input:trainX})
def convlayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', stride=None, trans_weights=False): #print "kernel shape", tparams[prefix+"_W"].get_value().shape[2] kernel_shape = tparams[prefix + "_W"].get_value().shape[2] if kernel_shape == 5: if stride == 2 or stride == -2: padsize = 2 else: padsize = 2 elif kernel_shape == 1: padsize = 0 else: raise Exception(kernel_shape) weights = tparams[prefix + '_W'] if trans_weights: weights = weights.transpose(1, 0, 2, 3) if stride == -2: conv_out = deconv(state_below, weights.transpose(1, 0, 2, 3), subsample=(2, 2), border_mode=(2, 2)) else: conv_out = dnn.dnn_conv(img=state_below, kerns=weights, subsample=(stride, stride), border_mode=padsize, precision='float32') conv_out = conv_out + tparams[prefix + '_b'].dimshuffle('x', 0, 'x', 'x') if prefix + "_newmu" in tparams: batch_norm = True #print "using batch norm for prefix", prefix else: batch_norm = False if batch_norm: conv_out = (conv_out - T.mean(conv_out, axis=(0, 2, 3), keepdims=True) ) / (0.01 + T.std(conv_out, axis=(0, 2, 3), keepdims=True)) conv_out = conv_out * tparams[prefix + '_newsigma'].dimshuffle( 'x', 0, 'x', 'x') + tparams[prefix + '_newmu'].dimshuffle( 'x', 0, 'x', 'x') conv_out = eval(activ)(conv_out) return conv_out
def output(self, x): d_0 = global_theano_rand.binomial(x.shape, p=1 - self.d_p_0, dtype=FLOATX) d_1 = global_theano_rand.binomial((x.shape[0], self.projection_dim), p=1 - self.d_p_1, dtype=FLOATX) tl_raw = T.dot(x * d_0, self.W_tl) hl_raw = T.dot(x * d_0, self.W_hl) tl_mean = T.mean(tl_raw, axis=0) hl_mean = T.mean(hl_raw, axis=0) tl_std = T.std(tl_raw, axis=0) hl_std = T.std(hl_raw, axis=0) tl = (tl_raw - tl_mean) / (tl_std + self.epsilon) hl = (hl_raw - hl_mean) / (hl_std + self.epsilon) new_Mean_tl = self.tau * tl_mean + (1.0 - self.tau) * self.Mean_tl new_Mean_hl = self.tau * hl_mean + (1.0 - self.tau) * self.Mean_hl new_Std_tl = self.tau * tl_std + (1.0 - self.tau) * self.Std_tl new_Std_hl = self.tau * hl_std + (1.0 - self.tau) * self.Std_hl tr_raw = (tl * d_1).dot(self.W_tr) hr_raw = (hl * d_1).dot(self.W_hr) tr_mean = T.mean(tr_raw, axis=0) hr_mean = T.mean(hr_raw, axis=0) tr_std = T.std(tr_raw, axis=0) hr_std = T.std(hr_raw, axis=0) tr = (tr_raw - tr_mean) / (tr_std + self.epsilon) hr = (hr_raw - hr_mean) / (hr_std + self.epsilon) new_Mean_tr = self.tau * tr_mean + (1.0 - self.tau) * self.Mean_tr new_Mean_hr = self.tau * hr_mean + (1.0 - self.tau) * self.Mean_hr new_Std_tr = self.tau * tr_std + (1.0 - self.tau) * self.Std_tr new_Std_hr = self.tau * hr_std + (1.0 - self.tau) * self.Std_hr t = T.nnet.sigmoid(tr * self.S_t + self.B_t) h = self._act(hr * self.S_h + self.B_h) rv = h * t + x * (1 - t) self.register_training_updates( (self.Mean_tl, new_Mean_tl), (self.Mean_hl, new_Mean_hl), (self.Mean_tr, new_Mean_tr), (self.Mean_hr, new_Mean_hr), (self.Std_tl, new_Std_tl), (self.Std_hl, new_Std_hl), (self.Std_tr, new_Std_tr), (self.Std_hr, new_Std_hr)) return rv
def get_output_for(self, input, **kwargs): # compute featurewise mean and std for the minibatch orig_shape = input.shape temp = T.reshape(input, (-1, orig_shape[-1])) means = T.mean(input, 0, dtype=input.dtype) stds = T.std(input, 0) temp = (temp - means) / stds input = T.reshape(temp, orig_shape) return input
def zScoreNormalization(self, X_data): f = function([], [T.mean(self.out, axis=0, dtype='float32'), T.std (self.out, axis=0, dtype='float32')], givens=[(self.X, X_data)]) mean, std = f() std += (std < 1e-5) self.out = (self.out - mean) / std
def get_output_for(self, input, **kwargs): output_shape = input.shape if input.ndim > 2: input = T.flatten(input, 2) if self.norm_type == "mean_var": input -= T.mean(input, axis=1, keepdims=True) input /= T.std(input, axis=1, keepdims=True) input = input.reshape(output_shape) return input
def batch_norm(self, h, dim, use_shift=True, use_std=True): bn = (h - T.mean(h,axis=1,keepdims=True)) / (T.std(h,axis=1,keepdims=True) + numpy.float32(1e-10)) if use_std: gamma = self.add_param(self.shared(numpy.zeros((dim,), 'float32') + numpy.float32(0.1), "%s_gamma" % h.name)) bn *= gamma.dimshuffle('x','x',0).repeat(h.shape[0],axis=0).repeat(h.shape[1],axis=1) if use_shift: beta = self.add_param(self.shared(numpy.zeros((dim,), 'float32'), "%s_beta" % h.name)) bn += beta return bn
def output(self, x): d_0 = global_theano_rand.binomial(x.shape, p=1-self.d_p_0, dtype=FLOATX) d_1 = global_theano_rand.binomial((x.shape[0], self.projection_dim), p=1-self.d_p_1, dtype=FLOATX) tl_raw = T.dot(x * d_0, self.W_tl) hl_raw = T.dot(x * d_0, self.W_hl) tl_mean = T.mean(tl_raw, axis=0) hl_mean = T.mean(hl_raw, axis=0) tl_std = T.std(tl_raw, axis=0) hl_std = T.std(hl_raw, axis=0) tl = (tl_raw - tl_mean) / (tl_std + self.epsilon) hl = (hl_raw - hl_mean) / (hl_std + self.epsilon) new_Mean_tl = self.tau * tl_mean + (1.0 - self.tau) * self.Mean_tl new_Mean_hl = self.tau * hl_mean + (1.0 - self.tau) * self.Mean_hl new_Std_tl = self.tau * tl_std + (1.0 - self.tau) * self.Std_tl new_Std_hl = self.tau * hl_std + (1.0 - self.tau) * self.Std_hl tr_raw = (tl * d_1).dot(self.W_tr) hr_raw = (hl * d_1).dot(self.W_hr) tr_mean = T.mean(tr_raw, axis=0) hr_mean = T.mean(hr_raw, axis=0) tr_std = T.std(tr_raw, axis=0) hr_std = T.std(hr_raw, axis=0) tr = (tr_raw - tr_mean) / (tr_std + self.epsilon) hr = (hr_raw - hr_mean) / (hr_std + self.epsilon) new_Mean_tr = self.tau * tr_mean + (1.0 - self.tau) * self.Mean_tr new_Mean_hr = self.tau * hr_mean + (1.0 - self.tau) * self.Mean_hr new_Std_tr = self.tau * tr_std + (1.0 - self.tau) * self.Std_tr new_Std_hr = self.tau * hr_std + (1.0 - self.tau) * self.Std_hr t = T.nnet.sigmoid(tr * self.S_t + self.B_t) h = self._act(hr * self.S_h + self.B_h) rv = h * t + x * (1 - t) self.register_training_updates((self.Mean_tl, new_Mean_tl), (self.Mean_hl, new_Mean_hl), (self.Mean_tr, new_Mean_tr), (self.Mean_hr, new_Mean_hr), (self.Std_tl, new_Std_tl), (self.Std_hl, new_Std_hl), (self.Std_tr, new_Std_tr), (self.Std_hr, new_Std_hr)) return rv
def __call__(self, x, *args): if self.normalize: W = self.g.dimshuffle(0,'x','x','x') * \ (self.W - self.W.mean(axis=[1,2,3]).dimshuffle(0,'x','x','x')) / \ T.sqrt(T.sum(self.W**2, axis=[1,2,3])).dimshuffle(0,'x','x','x') else: W = self.W #print("conv call:",x,W,self.mode,self.stride) #print(x.tag.test_value.shape #try: # print(W.tag.test_value.shape) #except: # print(W.get_value().shape) #print(self.mode) #print(self.stride) if self.cudnn: conv_out = dnn_conv(x,W,self.mode,self.stride) else: if self.mode == 'half' and 'cpu' in theano.config.device: fso = self.filter_shape[2] - 1 nps = x.shape[2] conv_out = conv.conv2d(input=x, filters=W, filter_shape=self.filter_shape, border_mode='full', subsample=self.stride)[:,:,fso:nps+fso,fso:nps+fso] else: conv_out = conv.conv2d( input=x, filters=W, filter_shape=self.filter_shape, border_mode=self.mode, subsample=self.stride, #image_shape=self.image_shape if image_shape is None else image_shape ) if self.normalize and not shared.isJustReloadingModel: mu = T.mean(conv_out, axis=[0,2,3]).eval({shared.init_tensor_x: shared.init_minibatch_x}) sigma = T.std(conv_out, axis=[0,2,3]).eval({shared.init_tensor_x: shared.init_minibatch_x}) print("normalizing:",mu.mean(),sigma.mean()) self.g.set_value( 1 / sigma) self.b.set_value(-mu/sigma) if hasattr(shared, 'preactivations'): shared.preactivations.append(conv_out) if 0: # mean-norm conv_out = conv_out - conv_out.mean(axis=[0,2,3]).dimshuffle('x',0,'x','x') if self.use_bias: out = self.activation(conv_out + self.b.dimshuffle('x',0,'x','x')) else: out = self.activation(conv_out) #print("out:", out.tag.test_value.shape) return out
def output(self, input=None, dropout_active=True, *args, **kwargs): mean = T.mean(self.input_layer.output(), axis=(0, 2, 3), keepdims=True) std = T.std(self.input_layer.output(), axis=(0, 2, 3), keepdims=True) x = (self.input_layer.output() - mean)/(std + self.epsilon) gamma = self.gamma.dimshuffle('x', 0, 'x', 'x') beta = self.beta.dimshuffle('x', 0, 'x', 'x') y = gamma * x + beta output = y return output
def normal_ml(node, sample, weights): rstate, shape, mu, sigma = node.inputs eps = 1e-8 if weights is None: new_mu = tensor.mean(sample) new_sigma = tensor.std(sample) else: denom = tensor.maximum(tensor.sum(weights), eps) new_mu = tensor.sum(sample * weights) / denom new_sigma = tensor.sqrt(tensor.sum(weights * (sample - new_mu) ** 2) / denom) return Updates({mu: new_mu, sigma: new_sigma})
def pre_process_data(x_shared): idx = T.iscalar('index') sym_x = T.dmatrix('X') if 'whiten_per_image' in preprocess: pp_sym_x = (sym_x-T.mean(sym_x,axis=1).reshape(-1,1))/T.std(sym_x,axis=1).reshape(-1,1) print(x_shared.eval().shape) test_whitening = theano.function(inputs=[idx],outputs=pp_sym_x, givens={sym_x:x_shared[idx:idx+2]} ) plt.imshow(test_whitening(1)[0])
def get_monitoring_channels(self, model, data, **kwargs): chans = super(FunnelGSNCost, self) \ .get_monitoring_channels(model, data, **kwargs) output = self._get_samples_from_model(model, data) # axes: 0: time step, 1: item in minibatch, 2: sample component samples = T.stack(*list(itertools.chain(*output))) # only want first component of each sample # axis 0: time step, axis 1: item in in minibatch samples = samples[:, :, 0] # sigma^2 = 9.0 chans['x_std'] = T.std(data[0][:, 0]) likelihood = T.exp(-T.sqr(samples) / 18.0) / T.sqrt(18.0 * math.pi) #chans['y_ll'] = T.sum(T.log(likelihood)) chans['y_mean'] = T.mean(samples) chans['y_std'] = T.std(samples) return chans
def create_activation(activation): '''Given an activation description, return a callable that implements it. Parameters ---------- activation : string A string description of an activation function to use. Returns ------- activation : callable(float) -> float A callable activation function. ''' def compose(a, b): c = lambda z: b(a(z)) c.__theanets_name__ = '%s(%s)' % (b.__theanets_name__, a.__theanets_name__) return c if '+' in activation: return functools.reduce( compose, (create_activation(a) for a in activation.split('+'))) options = { 'tanh': TT.tanh, 'linear': lambda z: z, 'logistic': TT.nnet.sigmoid, 'sigmoid': TT.nnet.sigmoid, 'softplus': TT.nnet.softplus, 'softmax': softmax, # rectification 'relu': lambda z: TT.maximum(0, z), 'trel': lambda z: TT.maximum(0, TT.minimum(1, z)), 'trec': lambda z: TT.maximum(1, z), 'tlin': lambda z: z * (abs(z) > 1), # modifiers 'rect:max': lambda z: TT.minimum(1, z), 'rect:min': lambda z: TT.maximum(0, z), # normalization 'norm:dc': lambda z: z - z.mean(axis=-1, keepdims=True), 'norm:max': lambda z: z / TT.maximum(TT.cast(1e-7, FLOAT), abs(z).max(axis=-1, keepdims=True)), 'norm:std': lambda z: z / TT.maximum(TT.cast(1e-7, FLOAT), TT.std(z, axis=-1, keepdims=True)), 'norm:z': lambda z: (z - z.mean(axis=-1, keepdims=True)) / TT.maximum(TT.cast(1e-7, FLOAT), z.std(axis=-1, keepdims=True)), } for k, v in options.items(): v.__theanets_name__ = k try: return options[activation.lower()] except KeyError: raise KeyError('unknown activation {}'.format(activation))
def build_model(self): X, Y, w1, w2, w3, w4, w5, w6, w_o = self.X, self.Y, self.w1, self.w2, self.w3, self.w4, self.w5, self.w6, self.w_o b1,b2,b3,b4,b5,b6 = self.b1,self.b2,self.b3,self.b4,self.b5,self.b6 g, b = self.g, self.b l1, l2, l3, l4, l5, l6, py_x = self.model(X, w1, w2, w3, w4, w5, w6, w_o, 0., 0.) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) params = [w1, w2, w3, w4, w5, w6, w_o, g, b, b1, b2, b3, b4, b5, b6] updates,grads = l.RMSprop(cost, params, lr=0.001) self.update_running_mean_std(updates, T.mean(l6, axis=0), T.std(l6,axis=0)) self.train = theano.function(inputs=[X, Y], outputs=[cost,T.sum((grads)[0]),l1], updates=updates, allow_input_downcast=True) py_x = self.test_model(X, w1, w2, w3, w4, w5, w6, w_o, 0., 0.) y_x = T.argmax(py_x, axis=1) self.predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) print "Done building the model..."
def build_model_util(self, alpha, w_o, g, b, r_m, r_s): X, Y, w1, w2, w3, w4, w5, w6 = self.X, self.Y, self.w1, self.w2, self.w3, self.w4, self.w5, self.w6 b1,b2,b3,b4,b5,b6 = self.b1,self.b2,self.b3,self.b4,self.b5,self.b6 l6, py_x = self.model(X, w1, w2, w3, w4, w5, w6, w_o, g, b) cost = alpha * T.mean(T.nnet.categorical_crossentropy(py_x, Y)) params = [w1, w2, w3, w4, w5, w6, w_o, g, b, b1, b2, b3, b4, b5, b6] updates,grads = l.RMSprop(cost, params, lr=0.01) self.update_running_mean_std(updates,r_m,r_s,T.mean(l6, axis=0), T.std(l6,axis=0)) train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) py_x = self.test_model(X, w1, w2, w3, w4, w5, w6, w_o, r_m, r_s, g, b) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) y_x = T.argmax(py_x, axis=1) predict = theano.function(inputs=[X,Y], outputs=[y_x, cost], allow_input_downcast=True) return train, predict
def fprop(self, x, isTest=False): ret = x if not isTest: norm_axis = (1,)+tuple(range(2,len(self.inputShape))) x_avg = T.mean(x, axis=norm_axis, keepdims=True) x_std = T.std(x, axis=norm_axis, keepdims=True) ret = (x-x_avg)/(x_std+1e-4) return ret # End BatchNormLayer #-------------------------------------------------------------------------------
def _build_activation(self, act=None): '''Given an activation description, return a callable that implements it. Parameters ---------- activation : string A string description of an activation function to use. Returns ------- callable(float) -> float : A callable activation function. ''' def compose(a, b): c = lambda z: b(a(z)) c.__theanets_name__ = '%s(%s)' % (b.__theanets_name__, a.__theanets_name__) return c if '+' in act: return functools.reduce( compose, (self._build_activation(a) for a in act.split('+'))) options = { 'tanh': TT.tanh, 'linear': lambda z: z, 'logistic': TT.nnet.sigmoid, 'sigmoid': TT.nnet.sigmoid, 'softplus': TT.nnet.softplus, 'softmax': softmax, # shorthands 'relu': lambda z: z * (z > 0), 'trel': lambda z: z * (z > 0) * (z < 1), 'trec': lambda z: z * (z > 1), 'tlin': lambda z: z * (abs(z) > 1), # modifiers 'rect:max': lambda z: TT.minimum(1, z), 'rect:min': lambda z: TT.maximum(0, z), # normalization 'norm:dc': lambda z: (z.T - z.mean(axis=1)).T, 'norm:max': lambda z: (z.T / TT.maximum(1e-10, abs(z).max(axis=1))).T, 'norm:std': lambda z: (z.T / TT.maximum(1e-10, TT.std(z, axis=1))).T, } for k, v in options.items(): v.__theanets_name__ = k try: return options[act] except KeyError: raise KeyError('unknown activation %r' % act)
def output(self, x): x_mean = T.mean(x, axis=0) x_std = T.std(x, axis=0) rv = (x - x_mean) / (x_std + self.epsilon) if self.with_scale: rv = rv * self.S if self.with_bias: rv = rv + self.B new_mean = self.tau * x_mean + (1.0 - self.tau) * self.Mean new_std = self.tau * x_std + (1.0 - self.tau) * self.Std self.register_training_updates((self.Mean, new_mean), (self.Std, new_std)) return rv