def beta_H_Sparse(X, W, H, beta, l_sp): """Update activation with beta divergence Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar Returns ------- H : Theano tensor Updated version of the activations """ up = ifelse(T.eq(beta, 2), (T.dot(X, W)) / (T.dot(T.dot(H, W.T), W) + l_sp), (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), W)) / (T.dot(T.power(T.dot(H, W.T), (beta-1)), W) + l_sp)) return T.mul(H, up)
def W_beta_sub_withcst(X, W, Wsub, H, Hsub, beta, sum_grp, lambda_grp, card_grp): """Update group activation with beta divergence Parameters ---------- X : Theano tensor data W : Theano tensor Bases Wsub : Theano tensor group Bases H : Theano tensor activation matrix Hsub : Theano tensor group activation matrix beta : Theano scalar Returns ------- H : Theano tensor Updated version of the activations """ up = ifelse(T.eq(beta, 2), (T.dot(X.T, Hsub) + lambda_grp * sum_grp) / (T.dot(T.dot(H, W.T).T, Hsub) + lambda_grp * card_grp * Wsub), (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X).T, Hsub)+ lambda_grp * sum_grp) / (T.dot(T.power(T.dot(H, W.T), (beta-1)).T, Hsub) + lambda_grp * card_grp * Wsub)) return T.mul(Wsub, up)
def ion_O2_7319A_b_flux_log(self, emis_ratio, cHbeta, flambda, abund, ftau, O3, T_high): col_ext = tt.power(10, abund + emis_ratio - flambda * cHbeta - 12) recomb = tt.power( 10, O3 + 0.9712758 + tt.log10(tt.power(T_high / 10000.0, 0.44)) - flambda * cHbeta - 12) return tt.log10(col_ext + recomb)
def beta_H_groupSparse(X, W, H, beta, l_sp, start, stop): """Update activation with beta divergence Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar Returns ------- H : Theano tensor Updated version of the activations """ results, _ = theano.scan(fn=lambda start_i, stop_i, prior_results, H: T.set_subtensor( prior_results[:, start_i:stop_i].T, H[:, start_i:stop_i].T / H[:, start_i:stop_i].norm(2, axis=1)).T, outputs_info=T.zeros_like(H), sequences=[start, stop], non_sequences=H) cst = results[-1] up = ifelse(T.eq(beta, 2), (T.dot(X, W)) / (T.dot(T.dot(H, W.T), W) + l_sp * cst), (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), W)) / (T.dot(T.power(T.dot(H, W.T), (beta-1)), W) + l_sp * cst)) return T.mul(H, up)
def beta_div(X, W, H, beta): """Compute beta divergence D(X|WH) Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar Returns ------- div : Theano scalar beta divergence D(X|WH)""" div = ifelse( T.eq(beta, 2), T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)), ifelse( T.eq(beta, 0), T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1), ifelse( T.eq(beta, 1), T.sum( T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X), T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) + (beta - 1.) * T.power(T.dot(H, W), beta) - beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1))))))) return div
def beta_div(X, W, H, beta): """Compute beta divergence D(X|WH) Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar Returns ------- div : Theano scalar beta divergence D(X|WH)""" div = ifelse( T.eq(beta, 2), T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)), ifelse( T.eq(beta, 0), T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1), ifelse( T.eq(beta, 1), T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X), T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) + (beta - 1.) * T.power(T.dot(H, W), beta) - beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1))))))) return div
def leaky_beta_asymmetric(o, t, o2, v, alpha, beta, d, omega, tau_p, tau_n): """ Forgetful beta model with asymmetric updating Args: o: Trial outcome t: Time (not used) o2: Outcome of the other stimulus v: Previous trial value estimate (not used) alpha: Starting alpha beta: Starting beta d: Decay (forgetting) rate omega: Weight of the other stimulus outcome tau_p: Positive update weight tau_n: Negative update weight Returns: Mean: Estimated probability on the current trial (mean of beta distribution) Alpha: Alpha value on current trial Beta: Beta value on current trial Var: Variance of beta distribution """ alpha = (1 - d) * alpha + (o * tau_p) + (omega * o2) beta = (1 - d) * beta + ((1 - o) * tau_n) + (omega * (1 - o2)) alpha = T.maximum(T.power(0.1, 10), alpha) beta = T.maximum(T.power(0.1, 10), beta) value = alpha / (alpha + beta) var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1)) return (value, alpha, beta, var)
def logp(self, value): r""" Calculate log-probability of DiscreteWeibull distribution at specified value. Parameters ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple values are desired the values must be provided in a numpy array or theano tensor Returns ------- TensorVariable """ q = self.q beta = self.beta return bound( tt.log( tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), 0 <= value, 0 < q, q < 1, 0 < beta, )
def H_beta_sub(X, W, Wsub, H, Hsub, beta): """Update group activation with beta divergence Parameters ---------- X : Theano tensor data W : Theano tensor Bases Wsub : Theano tensor group Bases H : Theano tensor activation matrix Hsub : Theano tensor group activation matrix beta : Theano scalar Returns ------- H : Theano tensor Updated version of the activations """ up = ifelse(T.eq(beta, 2), (T.dot(X, Wsub)) / (T.dot(T.dot(H, W.T), Wsub)), (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), Wsub)) / (T.dot(T.power(T.dot(H, W.T), (beta-1)), Wsub))) return T.mul(Hsub, up)
def logp(self, value): q = self.q beta = self.beta return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), 0 <= value, 0 < q, q < 1, 0 < beta)
def my_activation(input): d = 5 input = input * T.power(10, d) input = T.round(input) x = input / T.power(10, d) abs_x = T.abs(x) return x / (1. + abs_x)
def __init__(self, n_inputs=1024, n_classes=10, n_hidden_nodes=100, alpha=0.1, lr=0.05, n_epoch=200, activation='sigmoid'): """ A neural network implementation using Theano for a one-hidden layer and output layer with 10 nodes :param n_hidden_nodes: Number of nodes in the hidden layer :param alpha: the coefficient for L-2 weight regularization :param n_epoch: Number of training epochs for SGD. Default: 200 :param activation: Choice of activation method among ['sigmoid', 'relu', 'linear']. Default: 'sigmoid' :param n_inputs: number of inputs (hard coded for assignment) :param n_classes: number of output nodes (hard coded for assignment) """ self.activation = activation self.n_epoch = n_epoch self.n_hidden_nodes = n_hidden_nodes self.n_inputs = n_inputs self.n_classes = n_classes # Initialize Weights & Theano variables & symbolic equations X = T.matrix('X') y = T.matrix('y') self.layers = [ theano.shared(name="W_hidden", value=floatX(np.random.rand(self.n_inputs, self.n_hidden_nodes) - 0.5)), theano.shared(name="W_output", value=floatX(np.random.rand(self.n_hidden_nodes, self.n_classes) - 0.5))] self.lr = theano.shared(floatX(lr)) self.alpha = theano.shared(floatX(alpha)) if self.activation == 'sigmoid': self.fprop = T.dot(T.nnet.sigmoid(T.dot(X, self.layers[0])), self.layers[1]) elif self.activation == 'relu': self.fprop = T.dot(T.nnet.relu(T.dot(X, self.layers[0])), self.layers[1]) else: self.fprop = T.dot(T.dot(X, self.layers[0]), self.layers[1]) self.regularization = 0.5 * self.alpha * T.sum(T.power(self.layers[0], 2)) + \ 0.5 * self.alpha * T.sum(T.power(self.layers[1], 2)) # TODO check L2 formula self.loss = T.mean((T.nnet.softmax(self.fprop) - y) ** 2) + self.regularization gradient_hidden = T.grad(cost=self.loss, wrt=self.layers[0]) gradient_output = T.grad(cost=self.loss, wrt=self.layers[1]) self.update = [(self.layers[0], self.layers[0] - gradient_hidden * self.lr), (self.layers[1], self.layers[1] - gradient_output * self.lr)] self.fit = theano.function(inputs=[X, y], outputs=self.loss, updates=self.update, allow_input_downcast=True) self.predict_ = theano.function(inputs=[X], outputs=T.argmax(T.nnet.softmax(self.fprop), axis=1), allow_input_downcast=True)
def log_logistic_sigmoid(x, mu, tau=1.0, eps=1e-6): """ Compute logpdf of a Gumbel Softmax distribution with parameters p, at values x. .. See Appendix B.[1:2] https://arxiv.org/pdf/1611.01144v2.pdf """ mu = T.clip(mu, -10., 10.) logpdf = mu + T.log(tau + eps) \ - (tau+1.) * ( T.log(x + eps) + T.log( 1.-x + eps) ) \ - 2. * T.log( T.exp(mu) * T.power(x,-tau) + T.power(1.-x,-tau) + eps ) return logpdf
def my_activation(input): d = 2 input = input * T.power(10, d) input = T.round(input) x = input / T.power(10, d) abs_x = abs(x) ret = x / (1. + abs_x) ret = T.round(ret * T.power(10, d)) / T.power(10, d) return ret
def distance_loss(coding_dist, true_dist): def set_inf_in2dim(j, coding_dist, true_label_id): """ Search true_label_id==j,and set coding_dist[i][j]="-inf" """ return T.switch(T.eq(j, true_label_id), T.constant(float("-inf")), coding_dist[j]) def set_inf_in1dim(i, coding_dist, true_label_id): #coding_dist[:,label_id] doesn't become "-0.0" loss_margin,updates=theano.scan(set_inf_in2dim,\ outputs_info=None,\ sequences=T.arange(coding_dist.shape[1]),\ non_sequences=[coding_dist[i],true_label_id[i]]) return loss_margin def compare_max(l2_norm, coding_dist): result,updates=theano.scan(lambda i,x:T.switch(T.le(x[i],T.constant(1e-12)),T.constant(1e-12),x[i]),\ outputs_info=None,\ sequences=T.arange(coding_dist.shape[0]),\ non_sequences=[l2_norm]) return result if true_dist.ndim == coding_dist.ndim: #L2-norm l2_norm = T.sqrt(T.sum(T.power(coding_dist, 2), axis=1)) l2_norm = compare_max(l2_norm, coding_dist) #label-norm #Calculation: predictioin to true_label true_pre = T.sum(true_dist * coding_dist, axis=1) y_pre2true = T.sqrt(T.power((true_pre / l2_norm) - 1, 2)) #search the true label id true_label_id = T.argmax(true_dist, axis=1) #persist the false label in coding_dist coding_dist = coding_dist / T.reshape(l2_norm, (100, 1)) coding_dist = (1 - true_dist) * coding_dist #set true label to "-inf" coding_dist_true2inf,updates=theano.scan(set_inf_in1dim,\ outputs_info=None,\ sequences=T.arange(coding_dist.shape[0]),\ non_sequences=[coding_dist,true_label_id]) #search the max in false label coding_dist_true2inf = T.max(coding_dist_true2inf, axis=1) #Calculation: predictioin to false_label y_pre2false = T.sqrt(T.power(coding_dist_true2inf - 1, 2)) loss = 1 + y_pre2true - y_pre2false return loss, coding_dist_true2inf, true_pre, loss
def leaky_beta_asymmetric_fixation_1(o, t, o2, f1, f2, v, alpha, beta, d, omega, tau_p, tau_n, gamma): """ Forgetful beta model with asymmetric updating and fixation weighting of learning -------------------------------------------------------------------------------- Fixation weighting is calculated such that the most attended stimulus is multiplied by 1 and the least weighted is multiplied by 1 minus the difference between the fixation durations for stimulus 1 and 2, which is itself weighted by the gamma parameter. E.g. If stimulus 1 is fixated 70% of the time and stimulus 2 is fixated 30% of the time, stimulus 1 updates will be weighted by 1 and stimulus 2 by 0.6 if gamma is set to 1. If gamma were 0.5, the update weights would be 1 and 0.8. Args: o: Trial outcome t: Time (not used) o2: Outcome of the other stimulus f1: Fixation duration proportion for this stimulus f2: Fixation duration proportion for the other stimulus v: Previous trial value estimate (not used) alpha: Starting alpha beta: Starting beta d: Decay (forgetting) rate omega: Weight of the other stimulus outcome tau_p: Positive update weight tau_n: Negative update weight gamma: Influence of fixation weighting on updates Returns: Mean: Estimated probability on the current trial (mean of beta distribution) Alpha: Alpha value on current trial Beta: Beta value on current trial Var: Variance of beta distribution """ f_weight = (f1 * gamma) + (1 - gamma) alpha = (1 - d) * alpha + (o * tau_p * f_weight) + (omega * f2 * o2) beta = (1 - d) * beta + ((1 - o) * tau_n * f_weight) + (omega * f2 * (1 - o2)) alpha = T.maximum(T.power(0.1, 10), alpha) beta = T.maximum(T.power(0.1, 10), beta) value = alpha / (alpha + beta) var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1)) return (value, alpha, beta, var)
def eucl_dist(X, Y): """Compute Euclidean distance between X and Y Parameters ---------- X : Theano tensor Y : Theano tensor Returns ------- out : Theano scalar Euclidean distance""" return T.sum((1.0 / 2) * (T.power(X, 2) + T.power(Y, 2) - 2 * T.mul(X, Y)))
def __init__(self, rng, input, AELayerSizes, classifyLayerSizes): self.input = input self.label = T.ivector('label') self.params = [] self.AEparams = [] self.params_inc = [] self.AELayerSizes = AELayerSizes + AELayerSizes[::-1][1:] self.AELayerNum = len(self.AELayerSizes) self.AELayers=[input] for i in range(1,self.AELayerNum): if i==1: self.AELayers.append(HiddenLayer(rng, self.input, self.AELayerSizes[0], self.AELayerSizes[1])) elif i!=self.AELayerNum-1: self.AELayers.append(HiddenLayer(rng, self.AELayers[i-1].output, self.AELayerSizes[i-1], self.AELayerSizes[i])) else: #last layer: linear output self.AELayers.append(HiddenLayer(rng, self.AELayers[i-1].output, self.AELayerSizes[i-1], self.AELayerSizes[i], activation=None)) self.params += self.AELayers[i].params self.AEparams += self.AELayers[i].params self.params_inc += self.AELayers[i].params_inc self.classifyLayerSizes = classifyLayerSizes self.classifyLayerNum = len(self.classifyLayerSizes) self.classifyLayers=[] for i in range(self.classifyLayerNum): if i==0: mid_layer = len(AELayerSizes)-1 last_input = self.AELayers[mid_layer].output else: last_input = self.classifyLayers[i-1].output if i==0: self.classifyLayers.append(HiddenLayer(rng, last_input, AELayerSizes[-1], self.classifyLayerSizes[i])) elif i!=self.classifyLayerNum-1: self.classifyLayers.append(HiddenLayer(rng, last_input, self.classifyLayerSizes[i-1], self.classifyLayerSizes[i])) else: self.classifyLayers.append(LogisticRegression(last_input, self.classifyLayerSizes[i-1], self.classifyLayerSizes[i])) self.params += self.classifyLayers[i].params self.params_inc += self.classifyLayers[i].params_inc self.loss_NLL = (self.classifyLayers[-1].negative_log_likelihood) self.loss_L2rec = T.mean(T.sum(T.power((self.input-self.AELayers[-1].output),2), axis=1)) self.loss_L2M = [] for i in range(1,self.AELayerNum/2): self.loss_L2M.append(T.mean(T.sum(T.power((self.AELayers[i].output-self.AELayers[-i-1].output),2), axis=1))) self.errors = self.classifyLayers[-1].errors
def bmt_update_dual(outcome, trial_type, V_loss, V_gain, var_loss, var_gain, kGain_loss, kGain_gain, theta): """With help from https://github.com/charleywu/cognitivemaps/blob/6570746510f0b27043bc97a01af65da2d3f88c44/models.R""" kGain_loss = T.switch(T.eq(trial_type, -1), var_loss / (var_loss + T.power(theta, 2)), kGain_loss) kGain_gain = T.switch(T.eq(trial_type, 1), var_gain / (var_gain + T.power(theta, 2)), kGain_gain) V_loss = T.switch(T.eq(trial_type, -1), V_loss + kGain_loss * (outcome - V_loss), V_loss) V_gain = T.switch(T.eq(trial_type, 1), V_gain + kGain_gain * (outcome - V_gain), V_gain) var_loss = T.switch(T.eq(trial_type, -1), var_loss * (1 - kGain_loss), var_loss) var_gain = T.switch(T.eq(trial_type, 1), var_gain * (1 - kGain_gain), var_gain) return V_loss, V_gain, var_loss, var_gain, kGain_loss, kGain_gain
def beta_div(X, W, H, beta): """Compute betat divergence""" div = ifelse(T.eq(beta, 2), T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)), ifelse(T.eq(beta, 0), T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1), ifelse(T.eq(beta, 1), T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X), T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) + (beta - 1.) * T.power(T.dot(H, W), beta) - beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1))))))) return div
def leaky_beta_asymmetric_fixation_2(o, t, o2, f1, f2, v, alpha, beta, d, omega, tau_p, tau_n, theta): """ Forgetful beta model with asymmetric updating and fixation weighting of value -------------------------------------------------------------------------------- Fixation weighting in this model is achieved by giving a bonus to the value of the most attended stimulus, which is relative to the difference between the most and least attended fixation proportions. This is achieved by adding this difference to alpha of the most attended stimulus, weighted by theta. E.g. If stimulus 1 is fixated 70% of the time and stimulus 2 is fixated 30% of the time, alpha of stimulus 1 will receive a bonus of 0.4 with theta = 1, or 0.2 if theta = 0.5. Args: o: Trial outcome t: Time (not used) o2: Outcome of the other stimulus f1: Fixation duration proportion for this stimulus f2: Fixation duration proportion for the other stimulus v: Previous trial value estimate (not used) alpha: Starting alpha beta: Starting beta d: Decay (forgetting) rate omega: Weight of the other stimulus outcome tau_p: Positive update weight tau_n: Negative update weight theta: Weighting on fixation-dependent bonus to alpha Returns: Mean: Estimated probability on the current trial (mean of beta distribution) Alpha: Alpha value on current trial Beta: Beta value on current trial Var: Variance of beta distribution """ alpha = (1 - d) * alpha + (o * tau_p) + (omega * f2 * o2) + f1 * theta beta = (1 - d) * beta + ((1 - o) * tau_n) + (omega * f2 * (1 - o2)) alpha = T.maximum(T.power(0.1, 10), alpha) beta = T.maximum(T.power(0.1, 10), beta) value = alpha / (alpha + beta) var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1)) return (value, alpha, beta, var)
def __init__(self, rng, layerSizes): self.AELayers=[] self.ups = [] self.downs = [] self.params = [] self.layerSizes = layerSizes self. n_layers = len(layerSizes)-1 assert self.n_layers>0 self.input = T.matrix('AE_Input') self.ups.append(self.input) for i in range(self.n_layers): if i==0: self.AELayers.append(AELayer(rng, self.ups[i], self.layerSizes[i],self.layerSizes[i+1],down_activation=None)) else: self.AELayers.append(AELayer(rng, self.ups[i], self.layerSizes[i],self.layerSizes[i+1])) self.params += (self.AELayers[i].params) self.ups.append(self.AELayers[i].get_hidden(self.ups[i])) self.downs.append(self.ups[-1]) for i in range(self.n_layers-1,-1,-1): self.downs.append(self.AELayers[i].get_reconstruction(self.downs[self.n_layers-1-i])) self.loss_rec = T.mean(T.sum(T.power((self.input-self.downs[-1]),2), axis=1))
def fit(self, X, y=None): self.n_features = y.shape[0] self.weights['input'] = theano.shared(value=np.zeros(( self.n_features, X.shape[1], self.spatial[0], self.spatial[1]), dtype=theano.config.floatX), name='w', borrow=True) input = T.tensor4(name='input') target = T.tensor4(name='target') decay = T.scalar(name='decay') xy = T.nnet.conv2d(input.transpose(1,0,2,3), target.transpose(1,0,2,3), border_mode=self.pad, subsample=self.stride) xx = T.sum(T.power(input, 2), axis=(0,2,3)) k = ifelse(self.hidden_matrices['input'] is None, ) lam = theano.shared(value=self._C, name='constrain', borrow=True) prediction = T.nnet.conv2d(input, self.weights['input'], border_mode=self.pad, subsample=self.stride) weights, _ = theano.scan( fn=lambda a, k, c: a/(k+c), outputs_info=None, sequences=[self.hidden_matrices['A'].transpose(1,0,2,3), self.hidden_matrices['K']], non_sequences=lam) new_weights = weights.transpose(1,0,2,3) updates = [(self.hidden_matrices['K'], self.hidden_matrices['K'].dot(decay)+xx), (self.hidden_matrices['A'], self.hidden_matrices['A'].dot(decay) + xy), (self.weights['input'], new_weights)] self.conv_fct['train'] = theano.function([input, target, decay], prediction, updates=updates) self.conv_fct['predict'] = theano.function([input], prediction) return self.conv_fct['train'](X, y, 1)
def _policy_function(self): epoch, gm, powr, step = T.scalars('epoch', 'gm', 'powr', 'step') if self.lr_policy == 'inv': decay = T.power(1.0 + gm * epoch, -powr) elif self.lr_policy == 'exp': decay = gm**epoch elif self.lr_policy == 'step': decay = T.switch(T.eq(T.mod_check(epoch, step), 0.0), T.power(gm, T.floor_div(epoch, step)), 1.0) elif self.lr_policy == 'fixed': decay = T.constant(1.0, name='fixed', dtype=theano.config.floatX) return theano.function([gm, epoch, powr, step], decay, allow_input_downcast=True, on_unused_input='ignore')
def relative_position_embedding_calc(prefix, params, layer_setting, state_below, mask_below): n_steps = state_below.shape[0] n_samples = state_below.shape[1] n_dim = layer_setting['n_out'] n_in = layer_setting['n_in'] if layer_setting['calc_method'] == 'embedding': len_below = T.cast(T.sum(mask_below.dimshuffle(1, 0), axis=1), dtype='int64') state_below = T.minimum(n_in - 1, state_below * n_in / len_below[None, :]) result = params[join(prefix, '_rel_emb')][state_below.flatten()].reshape( [n_steps, n_samples, n_dim]) else: threshold = theano.shared(layer_setting['threshold'], dtype='float32') scale = 2 * T.arange(n_dim / 2, dtype='float32') / n_dim scale = T.power(threshold, scale) len_below = T.sum(mask_below.dimshuffle(1, 0), axis=1) posi = (state_below / len_below[None, :]).flatten() value = posi[:, None] * scale[None, :] result = T.concatenate([T.sin(value), T.cos(value)], axis=1) result = T.reshape(result, [n_stesp, n_samples, n_dim]) return result
def st_d_logp(x, mu, nu, sigma2): x_p = (x - mu) / T.sqrt(sigma2) prob = T.log( T.gamma((nu + 1.0) / 2.0) / (T.gamma(nu / 2.0) * T.sqrt(pi * nu * sigma2)) * T.power(1.0 + x_p**2 / nu, -(nu + 1) / 2.0)) return prob
def likelihood(xs): return T.sum( T.log(beta) - T.log(2.0 * std * T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta))) - T.gammaln(1.0 / beta) + -T.power( T.abs_(xs - mu) / std * T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta)), beta))
def focal_loss(self, y, r=2): comp = tensor.ones_like(self.exp_x) - self.exp_x exp_comp = tensor.power(comp, r) focal_input = exp_comp * self.input return tensor.mean( tensor.sum(-y * focal_input, axis=2, keepdims=True) + tensor.log(self.sum_exp_x))
def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): old_prob_var = old_dist_info_vars["prob"] new_prob_var = new_dist_info_vars["prob"] # Assume layout is N * A N = old_prob_var.shape[0] temp = TT.arange(N) x_inds = x_var#from_onehot_sym(x_var)# return TT.sum(TT.mul(x_var, TT.log(new_prob_var + TINY)), axis=1)/TT.sum(TT.mul(x_var, TT.log(old_prob_var + TINY)), axis=1) return TT.power(((new_prob_var[TT.arange(N)] + TINY) / (old_prob_var[TT.arange(N)] + TINY)), x_inds)
def likelihood(xs): return tt.sum( tt.log(beta) - tt.log(2.0 * std * tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta))) - tt.gammaln(1.0 / beta) + -tt.power( tt.abs_(xs - mu) / std * tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta)), beta))
def focal_loss2(self, y, r=2): x = self.exp_x / self.sum_exp_x log_x = tensor.log(x) comp = tensor.ones_like(self.exp_x) - self.exp_x exp_comp = tensor.power(comp, r) focal_input = exp_comp * log_x return tensor.mean(tensor.sum(-y * focal_input, axis=2, keepdims=True))
def _policy_function(self): epoch, gm, powr, step = T.scalars('epoch', 'gm', 'powr', 'step') if self.lr_policy == 'inv': decay = T.power(1.0+gm*epoch, -powr) elif self.lr_policy == 'exp': decay = gm ** epoch elif self.lr_policy == 'step': decay = T.switch(T.eq(T.mod_check(epoch, step), 0.0), T.power(gm, T.floor_div(epoch, step)), 1.0) elif self.lr_policy == 'fixed': decay = T.constant(1.0, name='fixed', dtype=theano.config.floatX) return theano.function([gm, epoch, powr, step], decay, allow_input_downcast=True, on_unused_input='ignore')
def leaky_beta_asymmetric_fixation_1b(o, t, o2, f1, f2, v, alpha, beta, d, omega, tau_p, tau_n, gamma): """ Forgetful beta model with asymmetric updating and continuous fixation weighting of learning ------------------------------------------------------------------------------------------- Identical to the first fixation model but with weighting updated continuously rather than downweighting the least fixated option. Args: o: Trial outcome t: Time (not used) o2: Outcome of the other stimulus f1: Fixation duration proportion for this stimulus f2: Fixation duration proportion for the other stimulus v: Previous trial value estimate (not used) alpha: Starting alpha beta: Starting beta d: Decay (forgetting) rate omega: Weight of the other stimulus outcome tau_p: Positive update weight tau_n: Negative update weight gamma: Influence of fixation weighting on updates Returns: Mean: Estimated probability on the current trial (mean of beta distribution) Alpha: Alpha value on current trial Beta: Beta value on current trial Var: Variance of beta distribution """ f_weight = T.switch(T.ge(f1, f2), 1, 1 - (f2 - f1) * gamma) alpha = (1 - d) * alpha + (o * tau_p * f_weight) + (omega * f2 * o2) beta = (1 - d) * beta + ((1 - o) * tau_n * f_weight) + (omega * f2 * (1 - o2)) alpha = T.maximum(T.power(0.1, 10), alpha) beta = T.maximum(T.power(0.1, 10), beta) value = alpha / (alpha + beta) var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1)) return (value, alpha, beta, var)
def leaky_beta_asymmetric_fixation_2b(o, t, o2, f1, f2, v, alpha, beta, d, omega, tau_p, tau_n, theta): """ Forgetful beta model with asymmetric updating and continuous fixation weighting of value ---------------------------------------------------------------------------------------- Identical to the second fixation model but with weighting updated continuously rather than downweighting the least fixated option. Args: o: Trial outcome t: Time (not used) o2: Outcome of the other stimulus f1: Fixation duration proportion for this stimulus f2: Fixation duration proportion for the other stimulus v: Previous trial value estimate (not used) alpha: Starting alpha beta: Starting beta d: Decay (forgetting) rate omega: Weight of the other stimulus outcome tau_p: Positive update weight tau_n: Negative update weight theta: Weighting on fixation-dependent bonus to alpha Returns: Mean: Estimated probability on the current trial (mean of beta distribution) Alpha: Alpha value on current trial Beta: Beta value on current trial Var: Variance of beta distribution """ alpha = (1 - d) * alpha + (o * tau_p) + ( omega * f2 * o2) + T.largest(0, f1 - f2) * theta beta = (1 - d) * beta + ((1 - o) * tau_n) + (omega * f2 * (1 - o2)) alpha = T.maximum(T.power(0.1, 10), alpha) beta = T.maximum(T.power(0.1, 10), beta) value = alpha / (alpha + beta) var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1)) return (value, alpha, beta, var)
def _ppf(self, p): """ The percentile point function (the inverse of the cumulative distribution function) of the discrete Weibull distribution. """ q = self.q beta = self.beta return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64')
def policy_update(self, lr_policy): epoch, gm, powr, step = T.scalars('epoch', 'gm', 'powr', 'step') if lr_policy == 'inv': decay = T.power(1+gm*epoch, -powr) elif lr_policy == 'exp': decay = gm ** epoch elif lr_policy == 'step': decay = T.switch(T.eq(T.mod_check(epoch, step), 0), T.power(gm, T.floor_div(epoch, step)), 1.0) elif lr_policy == 'fixed': decay = T.constant(1.0, name='fixed', dtype='float32') return theano.function([gm, epoch, powr, step], decay, updates=[(self.shared_lr, self.shared_lr * decay)], on_unused_input='ignore')
def _ppf(self, p): r""" The percentile point function (the inverse of the cumulative distribution function) of the discrete Weibull distribution. """ q = self.q beta = self.beta return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64")
def generate_encoder_inflate_pretraining_problem( ae: MechanisticAutoEncoder, pretrained_inputs: pd.DataFrame, pars: pd.DataFrame) -> Problem: """ Creates a pypesto problem that can be used to train encoder and inflate parameters. This is done based on the precomputed input parameters that were generated during cross sample pretraining. This function defines a least squares problem ||W_p*W*x - p||, where `W` is the encoder matrix, `W_p` is the inflate matrix, x is the input data and p are the pretrained input parameters. Optimization is performed over variables `W_p` and `W`. :param ae: Mechanistic autoencoder that will be pretrained :param pretrained_inputs: pretrained input parameters computed by performing cross sample pretraining :param pars: corresponding population input parameters that were pretrained along with the pretrained inputs. This input does not affect the solution, but will be stored as fixed parameters in the result such that it is available in later pretraining steps :returns: pypesto Problem """ least_squares = .5 * tt.sum( tt.power( ae.encode_params(ae.encoder_pars) - pretrained_inputs[ae.sample_names].values.T, 2)[:]) loss = theano.function([ae.encoder_pars], least_squares) loss_grad = theano.function([ae.encoder_pars], theano.grad(least_squares, [ae.encoder_pars])) return Problem(objective=Objective( fun=lambda x: np.float(loss(x[:ae.n_encoder_pars])), grad=lambda x: loss_grad(x[:ae.n_encoder_pars])[0]), ub=[np.inf for _ in ae.x_names[:ae.n_encoder_pars]], lb=[-np.inf for _ in ae.x_names[:ae.n_encoder_pars]], lb_init=[ parameter_boundaries_scales[name.split('_')[-1]][0] for name in ae.x_names[:ae.n_encoder_pars] ], ub_init=[ parameter_boundaries_scales[name.split('_')[-1]][1] for name in ae.x_names[:ae.n_encoder_pars] ], x_names=ae.x_names[:ae.n_encoder_pars] + list(pars.index), x_fixed_indices=list( range(ae.n_encoder_pars, ae.n_encoder_pars + ae.n_kin_params)), dim_full=ae.n_encoder_pars + ae.n_kin_params, x_fixed_vals=pars.values)
def r2(true, predicted): if not T.eq(true.shape, predicted.shape): try: raise AttributeError( "True and predicted arrays should have the same shape, current shapes: True = {0}," " predicted = {1}".format(true.shape, predicted.shape)) except: raise AttributeError( "True and predicted arrays should have the same shape") else: sst = T.power(true - true.mean(), 2).sum() ssr = T.power(true - predicted, 2).sum() r2 = T.switch(T.eq(sst, 0), 1, 1 - ssr / sst) return r2
def _integrand_constant(self, z, Om): """ :param z: redshift :param Om: matter content :return: theano array of 1/H(z) """ zp = (1 + z) Ode = 1 - Om - self._Or # Adjust cosmological constant return T.power(T.pow(zp, 3) * Om + Ode, -0.5)
def multivariate_normal_bulk_chol( bulk_weights, hps, slog_pdets, residuals, hp_specific=False): M = residuals.shape[1] tmp = tt.batched_dot(bulk_weights, residuals) llk = tt.power(tmp, 2).sum(1) return (-0.5) * ( slog_pdets + (M * (2 * hps + num.log(2 * num.pi))) + (1 / tt.exp(hps * 2)) * (llk))
def integrand_constant_flat(z, Om): """ :param z: redshift :param Om: matter content :return: theano array of 1/H(z) """ zp = 1 + z Ode = 1 - Om - Or # Adjust cosmological constant return T.power(T.pow(zp, 3) * Om + Ode, -0.5)
def integrand_w_flat(z, Om, w): """ :param z: redshift :param Om: matter content :param w: DE EOS :return: theano array of 1/H(z) """ zp = 1 + z Ode = 1 - Om - Or # Adjust cosmological constant return T.power((T.pow(zp, 3) * (Or * zp + Om) + Ode * T.pow(zp, 3.0 * (1 + w))), -0.5)
def get_rbfnet_predict_function(metric_name): X_matrix = T.dmatrix('X') W_matrix = T.dmatrix('W') beta = T.dvector('beta') b = T.scalar('b') H_matrix = metric_theano[metric_name](X_matrix, W_matrix) H_rbf = np.exp(T.power(H_matrix, 2) * (-b)) s = T.sgn(T.dot(H_rbf, beta)) rbfnet_predict_function = theano.function([X_matrix, W_matrix, beta, b], s) return rbfnet_predict_function
def integrand_constant_curve(z, Om, Ok): """ :param z: redshift :param Om: matter content :param Ok: curvature :return: theano array of 1/H(z) """ zp = 1 + z Ode = 1 - Om - Or - Ok return T.power(zp * zp * ((Or * zp + Om) * zp + Ok) + Ode, -0.5)
def beta_W(X, W, H, beta): """Update bases with beta divergence Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar Returns ------- W : Theano tensor Updated version of the bases """ up = ifelse(T.eq(beta, 2), (T.dot(X.T, H)) / (T.dot(T.dot(H, W.T).T, H)), (T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X).T, H)) / (T.dot(T.power(T.dot(H, W.T), (beta-1)).T, H))) return T.mul(W, up)
def sample_from_prior(self, z): ###### gausssian ####### if self.z_prior is "gaussian": return 1.0 * self.rng.normal(size=z.shape, dtype=theano.config.floatX) ###### uniform ######## elif self.z_prior is "uniform": v = get_normalized_vector(self.rng.normal(size=z.shape, dtype=theano.config.floatX)) r = T.power( self.rng.uniform(size=z.sum(axis=1, keepdims=True).shape, low=0, high=1.0, dtype=theano.config.floatX), 1.0 / z.shape[1], ) r = T.patternbroadcast(r, [False, True]) return 2.0 * r * v else: raise NotImplementedError()
def get_rbfnet_learning_func(f_name): assert f_name == 'euclidean' X_matrix = T.dmatrix('X') W_matrix = T.dmatrix('W') b = T.scalar('b') C_scalar = T.scalar('C') y_vector = T.dvector('y') H_matrix = metric_theano[f_name](X_matrix, W_matrix) H_rbf = np.exp(T.power(H_matrix, 2) * (-b)) beta_matrix = T.dot( matrix_inverse(T.dot(H_rbf.T, H_rbf) + 1.0 / C_scalar * T.eye(H_rbf.shape[1])), T.dot(H_rbf.T, y_vector).T) # beta_function = theano.function([H_matrix, C_scalar, y_vector], beta_matrix) rbfnet_learning_function = theano.function([X_matrix, W_matrix, C_scalar, b, y_vector], beta_matrix) return rbfnet_learning_function
def adamupdate(self, cost, lr=.001, b1=.9, b2=.999): updates = [] for weights, m, v, biases, mb, vb in zip(self.W, self.adam_w_m, self.adam_w_v, self.b, self.adam_b_m, self.adam_b_v): m_u = (b1 * m) + ((1.0 - b1) * T.grad(cost=cost, wrt=weights)) v_u = (b2 * v) + ((1.0 - b2) * T.power(T.grad(cost=cost, wrt=weights), 2.0)) m_u_c = m_u / (1.0 - T.power(b1, self.stepnum)) v_u_c = v_u / (1.0 - T.power(b2, self.stepnum)) u = lr * m_u_c / (1e-7 + T.power(v_u_c, .5)) updates.append((m, m_u)) updates.append((v, v_u)) updates.append((weights, weights - u)) m_ub = (b1 * mb) + ((1.0 - b1) * T.grad(cost=cost, wrt=biases)) v_ub = (b2 * vb) + ((1.0 - b2) * T.power(T.grad(cost=cost, wrt=biases), 2.0)) m_u_cb = m_ub / (1.0 - T.power(b1, self.stepnum)) v_u_cb = v_ub / (1.0 - T.power(b2, self.stepnum)) ub = lr * m_u_cb / (1e-7 + T.power(v_u_cb, .5)) updates.append((mb, m_ub)) updates.append((vb, v_ub)) updates.append((biases, biases - ub)) return updates + [(self.stepnum, self.stepnum + 1.0)]
def _generate_conv(self): input = T.tensor4(name='input') if self.pooling == 'squareroot': conv_out = Pool.pool_2d( T.power(input,2), ds=(self.spatial[0], self.spatial[1]), ignore_border=self.ignore_border, mode='sum', padding=self.pad, st=None if self.stride is None else (self.stride, self.stride)) conv_out = T.sqrt(conv_out) else: conv_out = Pool.pool_2d( input, ds=(self.spatial[0], self.spatial[1]), ignore_border=self.ignore_border, mode=self.pooling, padding=self.pad, st=None if self.stride is None else (self.stride, self.stride)) if self.activation_fct is None: output = conv_out else: output = self.activation_fct(conv_out) self.conv = theano.function([input], output)
def __init__( self, input=None, n_visible=1024, n_hidden=1024, type='BB', #type 0 1 2 represents BBRBM GBRBM BGRBM respectly modelFile=None, MSEType='mean' ): """ RBM constructor. Defines the parameters of the model along with basic operations for inferring hidden from visible (and vice-versa), as well as for performing CD updates. """ self.n_visible = n_visible self.n_hidden = n_hidden self.MSEType = MSEType #set RBM type if type=='BB': self.type = 0 elif type=='GB': self.type = 1 elif type=='BG': self.type = 2 else: print 'RBM type %s error, use any of "BB GB BG" instead'%(type) exit() # create a number generator numpy_rng = numpy.random.RandomState(1234) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) #initial_W = numpy.asarray(0.1*numpy_rng.randn(n_visible,n_hidden),dtype=theano.config.floatX) initial_W = numpy.asarray( numpy_rng.uniform( low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden) ), dtype=theano.config.floatX ) # theano shared variables for weights and biases W = theano.shared(value=initial_W, name='W', borrow=True) # create shared variable for hidden units bias hbias = theano.shared( value=numpy.zeros( n_hidden, dtype=theano.config.floatX ), name='hbias', borrow=True ) # create shared variable for visible units bias vbias = theano.shared( value=numpy.zeros( n_visible, dtype=theano.config.floatX ), name='vbias', borrow=True ) w_inc = theano.shared( value=numpy.zeros( [n_visible, n_hidden], dtype=theano.config.floatX ), name='w_inc', borrow=True ) hbias_inc = theano.shared( value=numpy.zeros( n_hidden, dtype=theano.config.floatX ), name='hbias_inc', borrow=True ) vbias_inc = theano.shared( value=numpy.zeros( n_visible, dtype=theano.config.floatX ), name='vbias_inc', borrow=True ) # initialize input layer for standalone RBM or layer0 of DBN self.input = input if not input: self.input = T.matrix('input') self.W = W self.hbias = hbias self.vbias = vbias self.w_inc = w_inc self.hbias_inc = hbias_inc self.vbias_inc = vbias_inc self.theano_rng = theano_rng #default RBM training config self.lr = theano.shared(value=np.array(0.0001,dtype=theano.config.floatX)) self.momentum = theano.shared(value=np.array(0.9,dtype=theano.config.floatX)) self.weightCost = theano.shared(value=np.array(0.0002,dtype=theano.config.floatX)) self.params = [self.W, self.hbias, self.vbias] self.params_inc = [self.w_inc, self.hbias_inc, self.vbias_inc] self.output = self.propup(self.input)[1] self.recover = self.propdown(self.output)[1] if modelFile!=None: self.loadModel(modelFile) #### MSE constrain ################## if self.MSEType=='mode': self.mode = self.Binarization(self.output) self.mode_recover = self.propdown(self.mode)[1] self.MSECost = T.mean(T.sum(T.power((self.input-self.mode_recover),2), axis=1)) elif self.MSEType=='mean': self.MSECost = T.mean(T.sum(T.power((self.input-self.recover),2), axis=1)) else: print 'MSEType wrong: ',self.MSEType exit()
def cube(x): return T.power(x, 3)
def gaussian(x, mu, sigma): return T.exp(T.power((x - mu[0]), 2) / (-2 * sigma)[0]) / (sigma * T.sqrt(2 * np.pi))[0]
def error(self, x, y): return T.mean(0.5 * T.power((T.dot(x, self.W) + self.b).ravel() - y, 2))