def _build_BPF(self): print('start building the Bayesian probabilistic model') self.x_u = theano.shared(self.train_u) self.x_i = theano.shared(self.train_i) self.y_r = theano.shared(self.train_r) self.y_r_ui = theano.shared(np.array(self.nn_r_ui)) assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value())) with pm.Model() as self.bncf: #define the prior and likelihood b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0]) b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1]) u = pm.Normal('u', 0, sd=1) tY = pm.Deterministic( 'tY', tt.add( tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui), u)) #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui nY = pm.Deterministic('nY', pm.math.sigmoid(tY)) # likelihood of observed data Y = pm.Bernoulli( 'Y', nY, observed=self.y_r) #total_size=self.y_r.get_value().shape[0] with self.bncf: #inference approx = pm.fit(n=1000, method=pm.ADVI()) self.trace = approx.sample(draws=500) with self.bncf: #posterior prediction ppc = pm.sample_posterior_predictive(self.trace, progressbar=True) self.by_r_ui = ppc['Y'].mean(axis=0) print('done building the Bayesian probabilistic model')
def mcmc(ll, *frvs): full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)])) loglik = -full_log_likelihood(full_observations) proposals = free_RVs_prop H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik # -- this should be an inner loop g = [] g.append(tensor.grad(loglik, frvs)) proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)] rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)] full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)])) new_loglik = -full_log_likelihood(full_observations) gnew = [] gnew.append(tensor.grad(new_loglik, rvsp)) proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)] # -- Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik dH = Hnew - H accept = tensor.or_(dH < 0., U < tensor.exp(-dH)) return [tensor.switch(accept, -new_loglik, ll)] + \ [tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \ {}, theano.scan_module.until(accept)
def vgd_kernel_tensor(X0): XY = T.batched_dot(X0, X0.transpose(0,2,1)) x2 = T.reshape(T.sum(T.square(X0),axis=2), (X0.shape[0], X0.shape[1], 1)) X2e = T.repeat(x2, X0.shape[1], axis=2) H = T.sub(T.add(X2e, X2e.transpose(0,2,1)), 2 * XY) V = H.flatten(2) # median distance h = T.switch(T.eq((V.shape[1] % 2), 0), # if even vector T.mean(T.sort(V)[:, ((V.shape[1] // 2) - 1): ((V.shape[1] // 2) + 1)], axis=1), # if odd vector T.sort(V)[:, V.shape[1] // 2]) h = T.sqrt(0.5 * h / T.log(X0.shape[1].astype('float32') + 1.0)) # h = T.maximum(h, T.zeros_like(h) + 1e-4) # h = h / 2 Kxy = T.exp(-H / T.tile(h.dimshuffle(0, 'x', 'x'), (1, X0.shape[1], X0.shape[1])) ** 2 / 2.0) dxkxy = - T.batched_dot(Kxy, X0) sumkxy = T.sum(Kxy, axis=2).dimshuffle(0, 1, 'x') dxkxy = T.add(dxkxy, T.mul(X0, sumkxy)) / (T.tile(h.dimshuffle(0, 'x', 'x'), (1, X0.shape[1], X0.shape[2])) ** 2) return (Kxy, dxkxy, h)
def vgd_kernel(X0): XY = T.dot(X0, X0.transpose()) x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1)) X2e = T.repeat(x2, X0.shape[0], axis=1) H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY) V = H.flatten() # median distance h = T.switch( T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[((V.shape[0] // 2) - 1):((V.shape[0] // 2) + 1)]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2. Kxy = T.exp(-H / h**2 / 2.0) dxkxy = -T.dot(Kxy, X0) sumkxy = T.sum(Kxy, axis=1).dimshuffle(0, 'x') dxkxy = T.add(dxkxy, T.mul(X0, sumkxy)) / (h**2) return (Kxy, dxkxy, h)
def __init__(self, rng, input3, initial_hidden, n_in, n_hidden): self.input3 = input3 self.initial_hidden = initial_hidden matrix1 = numpy.asarray( rng.uniform( low = - numpy.sqrt(6./(n_in + n_hidden)), high = numpy.sqrt(6./(n_in + n_hidden)), size = (n_in, n_hidden)), dtype = 'float32') self.W1 = theano.shared(value = matrix1, name = 'W1') matrix2 = numpy.asarray( rng.uniform( low = - numpy.sqrt(6./(n_hidden + n_hidden)), high = numpy.sqrt(6./(n_hidden + n_hidden)), size = (n_hidden, n_hidden)), dtype = 'float32') self.W2 = theano.shared(value = matrix2, name = 'W2') b_values = numpy.zeros((n_hidden,), dtype= 'float32') self.b = theano.shared(value = b_values, name ='b') #self.intial_hidden = theano.shared(numpy.zeros(n_hidden, ), dtype = 'float32', name = 'intial_hidden') self.output = T.tanh( T.add(T.add(T.dot(self.input3, self.W1), T.dot(self.initial_hidden, self.W2)), self.b)) self.params = [self.W2, self.b, self.W1]
def __init__(self, n_in, n_out, input_data_list, activation_fn=tanh): self.n_in = n_in self.n_out = n_out self.activation_fn = activation_fn self.w = theano.shared( np.asarray( np.random.uniform( low=-np.sqrt(6.0/(n_in+n_out)), high=np.sqrt(6.0/(n_in+n_out)), size=(n_in, n_out)), dtype=theano.config.floatX), name='w', borrow=True) # self.b = theano.shared( # np.asarray(np.zeros((n_out)), dtype=theano.config.floatX), # name='b', borrow=True) # self.params = [self.w, self.b] # self.params = [self.w] self.b = theano.shared( np.asarray(np.random.normal(loc=0.0, scale=1.0/(n_in+n_out), size=(n_out,)), dtype=theano.config.floatX), name='b', borrow=True) self.params = [self.w, self.b] # self.w = T._shared( # np.asarray( # np.random.uniform( # low=-np.sqrt(6.0/(n_in+n_out)), high=np.sqrt(6.0/(n_in+n_out)), size=(n_in, n_out)), # dtype=theano.config.floatX), # name='w', borrow=True) # self.b = T._shared( # np.asarray(np.zeros((n_out)), dtype=theano.config.floatX), # name='b', borrow=True) self.q, self.d = input_data_list self.output = [self.activation_fn(T.add(TS.basic.structured_dot(self.q, self.w), self.b)), \ self.activation_fn(T.add(TS.basic.structured_dot(self.d, self.w), self.b))]
def logp(self, value): """ Calculate log-probability of AR distribution at specified value. Parameters ---------- value: numeric Value for which log-probability is calculated. Returns ------- TensorVariable """ if self.constant: x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)]) eps = value[self.p:] - self.rho[0] - x else: if self.p == 1: x = self.rho * value[:-1] else: x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)]) eps = value[self.p:] - x innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps) init_like = self.init.logp(value[:self.p]) return tt.sum(innov_like) + tt.sum(init_like)
def output(self, train): X = self.get_input(train) # shape: (nb_samples, time (padded with zeros at the end), input_dim) # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension X = X.dimshuffle((1, 0, 2)) xf = self.activation(T.dot(X, self.W_if) + self.b_if) xb = self.activation(T.dot(X, self.W_ib) + self.b_ib) b_o=self.b_o b_on= T.repeat(T.repeat(b_o.reshape((1,self.output_dim)),X.shape[0],axis=0).reshape((1,X.shape[0],self.output_dim)),X.shape[1],axis=0) # Iterate forward over the first dimension of the x array (=time). outputs_f, updates_f = theano.scan( self._step, # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i]) sequences=xf, # tensors to iterate over, inputs to _step # initialization of the output. Input to _step with default tap=-1. outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim), non_sequences=[self.W_ff,self.b_f], # static inputs to _step truncate_gradient=self.truncate_gradient ) # Iterate backward over the first dimension of the x array (=time). outputs_b, updates_b = theano.scan( self._step, # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i]) sequences=xb, # tensors to iterate over, inputs to _step # initialization of the output. Input to _step with default tap=-1. outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim), non_sequences=[self.W_bb,self.b_b], # static inputs to _step truncate_gradient=self.truncate_gradient, go_backwards=True # Iterate backwards through time ) #return outputs_f.dimshuffle((1, 0, 2)) if self.return_sequences: return T.add(T.tensordot(T.add(outputs_f.dimshuffle((1, 0, 2)), outputs_b[::-1].dimshuffle((1,0,2))),self.W_o,[[2],[0]]),b_on) return T.concatenate((outputs_f[-1], outputs_b[0]))
def __init__(self, rng, input3, initial_hidden, n_in, n_hidden): self.input3 = input3 self.initial_hidden = initial_hidden matrix1 = numpy.asarray(rng.uniform( low=-numpy.sqrt(6. / (n_in + n_hidden)), high=numpy.sqrt(6. / (n_in + n_hidden)), size=(n_in, n_hidden)), dtype='float32') self.W1 = theano.shared(value=matrix1, name='W1') matrix2 = numpy.asarray(rng.uniform( low=-numpy.sqrt(6. / (n_hidden + n_hidden)), high=numpy.sqrt(6. / (n_hidden + n_hidden)), size=(n_hidden, n_hidden)), dtype='float32') self.W2 = theano.shared(value=matrix2, name='W2') b_values = numpy.zeros((n_hidden, ), dtype='float32') self.b = theano.shared(value=b_values, name='b') #self.intial_hidden = theano.shared(numpy.zeros(n_hidden, ), dtype = 'float32', name = 'intial_hidden') self.output = T.tanh( T.add( T.add(T.dot(self.input3, self.W1), T.dot(self.initial_hidden, self.W2)), self.b)) self.params = [self.W2, self.b, self.W1]
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out): cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1) pre_w_out_sig = T.dot(pre_out_sig, w_ifco) pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2) preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco) inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh gate_act = self.sigmoid() # T.nnet.hard_sigmoid #T.nnet.sigmoid # Input Gate ig_t1 = gate_act(preact[:, 0:t_n_out]) # Forget Gate fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out]) # Cell State cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out]))) mask = T.addbroadcast(mask, 1) cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3) # Output Gate og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out]) # Output LSTM out_sig = T.mul(og_t1, inner_act(cs_t1_ln)) out_sig = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig, cs_t1]
def calc_output(self,in_tens): if in_tens.ndim == 1: prod = T.dot(self.W,in_tens) return T.add(prod,self.b) elif in_tens.ndim == 2: #batched inputs prod = T.dot(self.W,in_tens) return T.add(self.b[:,None],prod)
def output(self, train): X = self.get_input(train) X = X.dimshuffle((1,0,2)) if self.is_entity: Entity = X[-1:].dimshuffle(1,0,2) X = X[:-1] b_y = self.b_y b_yn = T.repeat(T.repeat(b_y.reshape((1,self.output_dim)),X.shape[0],axis=0).reshape((1,X.shape[0],self.output_dim)), X.shape[1], axis=0) xif = T.dot(X, self.W_if) + self.b_if xib = T.dot(X, self.W_ib) + self.b_ib xff = T.dot(X, self.W_ff) + self.b_ff xfb = T.dot(X, self.W_fb) + self.b_fb xcf = T.dot(X, self.W_cf) + self.b_cf xcb = T.dot(X, self.W_cb) + self.b_cb xof = T.dot(X, self.W_of) + self.b_of xob = T.dot(X, self.W_ob) + self.b_ob [outputs_f, memories_f], updates_f = theano.scan( self._step, sequences=[xif, xff, xof, xcf], outputs_info=[ alloc_zeros_matrix(X.shape[1], self.output_dim), alloc_zeros_matrix(X.shape[1], self.output_dim) ], non_sequences=[self.U_if, self.U_ff, self.U_of, self.U_cf], truncate_gradient=self.truncate_gradient ) [outputs_b, memories_b], updates_b = theano.scan( self._step, sequences=[xib, xfb, xob, xcb], outputs_info=[ alloc_zeros_matrix(X.shape[1], self.output_dim), alloc_zeros_matrix(X.shape[1], self.output_dim) ], non_sequences=[self.U_ib, self.U_fb, self.U_ob, self.U_cb], truncate_gradient=self.truncate_gradient ) if self.return_sequences: y = T.add(T.add( T.tensordot(outputs_f.dimshuffle((1,0,2)), self.W_yf, [[2],[0]]), T.tensordot(outputs_b[::-1].dimshuffle((1,0,2)), self.W_yb, [[2],[0]])), b_yn) # y = T.add(T.tensordot( # T.add(outputs_f.dimshuffle((1, 0, 2)), # outputs_b[::-1].dimshuffle((1,0,2))), # self.W_y,[[2],[0]]),b_yn) if self.is_entity: return T.concatenate([y, Entity], axis=1) else: return y return T.concatenate((outputs_f[-1], outputs_b[0]))
def f1_score(self, y): n_total = y.shape[0] n_relevant_documents_predicted = T.sum(T.eq(T.ones(self.y_pred.shape), self.y_pred)) two_vector = T.add(T.ones(self.y_pred.shape), T.ones(self.y_pred.shape)) n_relevant_predicted_correctly = T.sum(T.eq(T.add(self.y_pred, y), two_vector)) precision = T.true_div(n_relevant_predicted_correctly, n_relevant_documents_predicted) recall = T.true_div(n_relevant_predicted_correctly, n_total) f1_score = T.mul(2.0, T.true_div(T.mul(precision, recall), T.add(precision, recall))) return [f1_score, precision, recall]
def _generate_pred_model_function(self): u = T.iscalar('u') i = T.iscalar('i') pred = T.add(T.dot(self.W[u], self.H.T), self.B) self.get_user_res = theano.function(inputs=[u], outputs=pred) pred2A = T.dot(self.W[u], self.H[i].T) pred2 = T.add(pred2A, self.B[i]) self.get_user_item_res = theano.function(inputs=[u, i], outputs=pred2)
def __call__(self,M,*inputs): summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),self.bz] z = TT.nnet.sigmoid(TT.add(*summands)) summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),self.br] r = TT.nnet.sigmoid(TT.add(*summands)) summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),self.bm] Mtarg = TT.tanh(TT.add(*summands)) #pylint: disable=E1111 Mnew = (1-z)*M + z*Mtarg return Mnew
def scan_y(cur_step): # Compute pairwise affinities sum_y = tensor.sum(tensor.square(y_arg), 1) num = 1 / (1 + tensor.add(tensor.add(-2 * tensor.dot(y_arg, y_arg.T), sum_y).T, sum_y)) num = tensor.set_subtensor(num[range(n),range(n)], 0) Q = num / tensor.sum(num) Q = tensor.maximum(Q, 1e-12) PQ = p_arg - Q def inner(pq_i, num_i, y_arg_i): return tensor.sum(tensor.tile(pq_i * num_i, (no_dims, 1)).T * (y_arg_i - y_arg), 0) dy_arg, _ = theano.scan(inner, outputs_info = None, sequences = [PQ, num, y_arg]) dy_arg = tensor.cast(dy_arg,FLOATX) # dy_arg = y_arg momentum = ifelse(tensor.lt(cur_step, 20), initial_momentum_f, final_momentum_f) indexsa = tensor.neq((dy_arg>0), (iy_arg>0)).nonzero() indexsb = tensor.eq((dy_arg>0), (iy_arg>0)).nonzero() resulta = tensor.set_subtensor(gains_arg[indexsa], gains_arg[indexsa]+0.2) resultb = tensor.set_subtensor(resulta[indexsb], resulta[indexsb]*0.8) indexs_min = (resultb<min_gain_f).nonzero() new_gains_arg = tensor.set_subtensor(resultb[indexs_min], min_gain_f) # last step in simple version of SNE new_iy_arg = momentum * iy_arg - eta * (new_gains_arg * dy_arg) new_y_arg = y_arg + new_iy_arg new_y_arg = new_y_arg - tensor.tile(tensor.mean(new_y_arg, 0), (n, 1)) # # Compute current value of cost function # if (cur_step + 1) % 10 == 0: # C = tensor.sum(p_arg * tensor.log(p_arg / Q)) # print "Iteration ", (cur_step + 1), ": error is ", C # Stop lying about P-values # new_p_arg = p_arg # if cur_step == 2: # new_p_arg = p_arg / 4 # p_arg = p_arg / 4 # p_arg.set_value(p_arg.get_value / 4) new_p_arg = ifelse(tensor.eq(cur_step, 100), p_arg / 4, p_arg) return [(y_arg,new_y_arg),(iy_arg,new_iy_arg), (gains_arg,new_gains_arg),(p_arg,new_p_arg)]
def __call__(self,M,*inputs): assert len(inputs) == len(self.Wizs) summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),self.bz] z = TT.nnet.sigmoid(TT.add(*summands)) summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),self.br] r = TT.nnet.sigmoid(TT.add(*summands)) summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),self.bm] Mtarg = TT.tanh(TT.add(*summands)) #pylint: disable=E1111 Mnew = (1-z)*M + z*Mtarg return Mnew
def output(self, train): X = self.get_input( train ) # shape: (nb_samples, time (padded with zeros at the end), input_dim) # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension X = X.dimshuffle((1, 0, 2)) lenX = X.shape[0] Entity = X[lenX - 1:].dimshuffle(1, 0, 2) X = X[:lenX - 1] b_o = self.b_o b_on = T.repeat(T.repeat(b_o.reshape((1, self.output_dim)), X.shape[0], axis=0).reshape( (1, X.shape[0], self.output_dim)), X.shape[1], axis=0) xf = self.activation(T.dot(X, self.W_if) + self.b_if) xb = self.activation(T.dot(X, self.W_ib) + self.b_ib) # Iterate forward over the first dimension of the x array (=time). outputs_f, updates_f = theano.scan( self. _step, # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i]) sequences=xf, # tensors to iterate over, inputs to _step # initialization of the output. Input to _step with default tap=-1. outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim), non_sequences=[self.W_ff, self.b_f], # static inputs to _step truncate_gradient=self.truncate_gradient) # Iterate backward over the first dimension of the x array (=time). outputs_b, updates_b = theano.scan( self. _step, # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i]) sequences=xb, # tensors to iterate over, inputs to _step # initialization of the output. Input to _step with default tap=-1. outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim), non_sequences=[self.W_bb, self.b_b], # static inputs to _step truncate_gradient=self.truncate_gradient, go_backwards=True # Iterate backwards through time ) #return outputs_f.dimshuffle((1, 0, 2)) if self.return_sequences: return T.concatenate([ T.add( T.tensordot( T.add(outputs_f.dimshuffle( (1, 0, 2)), outputs_b[::-1].dimshuffle((1, 0, 2))), self.W_o, [[2], [0]]), b_on), Entity ], axis=1) return T.concatenate((outputs_f[-1], outputs_b[0]))
def logp(self, value): if self.constant: x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)]) eps = value[self.p:] - self.rho[0] - x else: if self.p == 1: x = self.rho * value[:-1] else: x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)]) eps = value[self.p:] - x innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps) init_like = self.init.logp(value[:self.p]) return tt.sum(innov_like) + tt.sum(init_like)
def __init__(self, **kwargs): super(ResNet, self).__init__(**kwargs) assert self.status[1] == 2, "Only accept 2 sources!" assert self.status[0], "Only accept cnn layers!" x = self.sources[0] f_x = self.sources[1] time = x.output.shape[0] batch = x.output.shape[1] self.input = T.add(x.Output, f_x.Output) self.Output = T.nnet.relu(self.input) if self.attrs['batch_norm']: self.Output = self.batch_norm( h=self.Output.reshape( (self.Output.shape[0], self.Output.shape[1] * self.Output.shape[2] * self.Output.shape[3])), dim=self.attrs['n_out'], force_sample=self.force_sample).reshape(self.Output.shape) output2 = self.Output.dimshuffle( 0, 2, 3, 1) # (time*batch, out-row, out-col, nb feature maps) self.output = output2.reshape( (time, batch, output2.shape[1] * output2.shape[2] * output2.shape[3])) # (time, batch, out-dim)
def splittings(omega, x, l): vals = [] for n in range(1, n2 + 1): # 0 to 35? area = 0 kern = np.loadtxt("kerns/l.{l:.0f}_n.{n:.0f}".format(l=l, n=n), skiprows=1) # This is bad: if x.size < 4800: v = int(x.size / n2) kern = kern[0::v] # Shouldn't this just be a dot product? for j in range(1, x.size): area = tt.add(area, (x[j] - x[j - 1]) * tt.dot(omega[j], kern[j])) beta_mask = (beta[:, 0] == l) * (beta[:, 1] == n) delta = tt.dot(beta[beta_mask, 2], area) vals.append(delta) vals = tt.as_tensor_variable(vals) vals = tt.squeeze(vals) print("vals") print(vals.tag.test_value) return vals
def test_meta_classes(): vec_tt = tt.vector('vec') vec_m = MetaSymbol.from_obj(vec_tt) assert vec_m.obj == vec_tt assert type(vec_m) == MetaTensorVariable # This should invalidate the underlying base object. vec_m.index = 0 assert vec_m.obj is None assert vec_m.reify().type == vec_tt.type assert vec_m.reify().name == vec_tt.name vec_type_m = vec_m.type assert type(vec_type_m) == MetaTensorType assert vec_type_m.dtype == vec_tt.dtype assert vec_type_m.broadcastable == vec_tt.type.broadcastable assert vec_type_m.name == vec_tt.type.name assert graph_equal(tt.add(1, 2), mt.add(1, 2).reify()) meta_var = mt.add(1, var()).reify() assert isinstance(meta_var, MetaTensorVariable) assert isinstance(meta_var.owner.op.obj, theano.Op) assert isinstance(meta_var.owner.inputs[0].obj, tt.TensorConstant) test_vals = [1, 2.4] meta_vars = MetaSymbol.from_obj(test_vals) assert meta_vars == [MetaSymbol.from_obj(x) for x in test_vals]
def th_logp(self, prior=False, noise=False): if prior: random_vars = self.model.free_RVs else: random_vars = self.model.basic_RVs factors = [var.logpt for var in random_vars] + self.model.potentials return tt.add(*map(tt.sum, factors))
def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept, target_acceptance_rate, stepsize_inc, stepsize_dec, stepsize_min, stepsize_max, avg_acceptance_slowness): # broadcast `accept` scalar to tensor with the same dimensions as final_pos. accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1))) # if accept is True, update to `final_pos` else stay put new_positions = TT.switch(accept_matrix, final_pos, positions) ## STEPSIZE UPDATES ## # if acceptance rate is too low, our sampler is too "noisy" and we reduce # the stepsize. If it is too high, our sampler is too conservative, we can # get away with a larger stepsize (resulting in better mixing). _new_stepsize = TT.switch(avg_acceptance_rate > target_acceptance_rate, stepsize * stepsize_inc, stepsize * stepsize_dec) # maintain stepsize in [stepsize_min, stepsize_max] new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max) # perform exponential moving average mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype) new_acceptance_rate = TT.add( avg_acceptance_slowness * avg_acceptance_rate, (1.0 - avg_acceptance_slowness) * accept.mean(dtype=mean_dtype)) return [(positions, new_positions), (stepsize, new_stepsize), (avg_acceptance_rate, new_acceptance_rate)]
def sum_logdets(self): dets = [self.logdet] current = self while not current.isroot: current = current.parent dets.append(current.logdet) return tt.add(*dets)
def __call__(self, X): XY = X.dot(X.T) x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, 'x') X2e = tt.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2. * XY V = tt.sort(H.flatten()) length = V.shape[0] # median distance m = tt.switch(tt.eq((length % 2), 0), # if even vector tt.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2]) h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = tt.exp(-H / h / 2.0) # Derivative dxkxy = -tt.dot(Kxy, X) sumkxy = tt.sum(Kxy, axis=1).dimshuffle(0, 'x') dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h return Kxy, dxkxy
def __init__(self, net, mixfrac=1.0, maxiter=25): #print( 'the mixfrac=', mixfrac) ################################### 0.1 #mixfrac=1.0 EzPickle.__init__(self, net, mixfrac, maxiter) self.net = net self.mixfrac = mixfrac self.ez_for_net = EzFlat(self.net.trainable_weights) x_nx = net.input self.predict = theano.function([x_nx], net.output, **FNOPTS) ypred_ny = net.output ytarg_ny = T.matrix("ytarg") var_list = net.trainable_weights # vfnet的可训练参数 l2 = 1e-3 * T.add(*[T.square(v).sum() for v in var_list]) N = x_nx.shape[0] mse = T.sum(T.square(ytarg_ny - ypred_ny)) / N symb_args = [x_nx, ytarg_ny] loss = mse + l2 self.opt = LbfgsOptimizer(loss, var_list, symb_args, maxiter=maxiter, extra_losses={ "mse": mse, "l2": l2 })
def ctc_loss(y_true, y_pred): def path_probs(predict, y_sym): pred_y = predict[:, y_sym] rr = recurrence_relation(y_sym.shape[0]) def step(p_curr, p_prev,rr): return p_curr * T.dot(p_prev, rr) probabilities, _ = theano.scan( step, sequences=[pred_y], outputs_info=[T.eye(y_sym.shape[0])[0]], non_sequences=[rr] ) return probabilities y_sym_a=T.argmax(y_true,axis=-1) n=T.cast(T.add(T.mul(2, y_true.shape[0] - T.sum(y_true[:,-1])),1),'int16') y_sym=T.cast(y_sym_a[:n],'int16') y_pred = T.clip(y_pred, epsilon, 1.0-epsilon) forward_probs = path_probs(y_pred, y_sym) backward_probs = path_probs(y_pred[::-1], y_sym[::-1])[::-1, ::-1] probs = forward_probs * backward_probs / y_pred[:, y_sym] total_probs = T.sum(probs) #total_probs=T.sum(forward_probs[-1,-2:]) return -T.log(total_probs)
def __call__(self, X): XY = X.dot(X.T) x2 = tt.sum(X**2, axis=1).dimshuffle(0, 'x') X2e = tt.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2. * XY V = tt.sort(H.flatten()) length = V.shape[0] # median distance m = tt.switch( tt.eq((length % 2), 0), # if even vector tt.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2]) h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = tt.exp(-H / h / 2.0) # Derivative dxkxy = -tt.dot(Kxy, X) sumkxy = tt.sum(Kxy, axis=-1, keepdims=True) dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h return Kxy, dxkxy
def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env( [x, b, c, one_of_n], [op(softmax(T.add(x,b,c)), one_of_n)]) assert env.outputs[0].owner.op == op print 'BEFORE' for node in env.toposort(): print node.op print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op print '====' assert len(env.toposort()) == 3 assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def logp(self, z): factors = ([tt.sum(var.logpt)for var in self.model.basic_RVs] + [tt.sum(var) for var in self.model.potentials]) p = self.approx.to_flat_input(tt.add(*factors)) p = theano.clone(p, {self.input: z}) return p
def _lmul(self, x, T): if T: if len(self.col_shape())>1: x2 = x.flatten(2) else: x2 = x n_rows = x2.shape[0] offset = 0 xWlist = [] assert len(self._col_sizes) == len(self._Wlist) for size, W in zip(self._col_sizes, self._Wlist): # split the output rows into pieces x_s = x2[:,offset:offset+size] # multiply each piece by one transform xWlist.append( W.lmul( x_s.reshape( (n_rows,)+W.col_shape()), T)) offset += size # sum the results rval = tensor.add(*xWlist) else: # multiply the input by each transform xWlist = [W.lmul(x,T).flatten(2) for W in self._Wlist] # join the resuls rval = tensor.join(1, *xWlist) return rval
def run_MCMC_ARp(x, y, draws, p, resmdl): phi_means = resmdl.params[:p] phi_sd = resmdl.bse[:p] with Model() as model8: alpha = Normal('alpha', mu=0, sd=10) beta = Normal('beta', mu=0, sd=10) sd = HalfNormal('sd', sd=10) phi = Normal('phi', mu=phi_means, sd=phi_sd, shape=p) y = tt.as_tensor(y) x = tt.as_tensor(x) y_r = y[p:] x_r = x[p:] resids = y - beta * x - alpha u = tt.add(*[phi[i] * resids[p - (i + 1):-(i + 1)] for i in range(p)]) mu = alpha + beta * x_r + u data = Normal('y_r', mu=mu, sd=sd, observed=y_r) with model8: if p == 1: step = None else: step = Metropolis([phi]) tune = int(draws / 5) trace = sample(draws, tune=tune, step=step, progressbar=False) print(summary(trace, varnames=['alpha', 'beta', 'sd', 'phi'])) #plt.show(forestplot(trace, varnames=['alpha', 'beta', 'sd', 'phi'])) #traceplot(trace, varnames=['alpha', 'beta', 'sd', 'phi']) return trace
def logpt(self): """Theano scalar of log-probability of the model""" with self: factors = [var.logpt for var in self.basic_RVs] + self.potentials logp = tt.add(*map(tt.sum, factors)) logp.name = '__logp' return logp
def __init__(self, net, mixfrac=1.0, maxiter=25): EzPickle.__init__(self, net, mixfrac, maxiter) self.net = net self.mixfrac = mixfrac x_nx = net.input # input layer of the keras net self.predict = theano.function([x_nx], net.output, **FNOPTS) # compiled theano func ypred_ny = net.output # input layer of the keras net ytarg_ny = T.matrix("ytarg") var_list = net.trainable_weights # l2 regularization with reg coeff of 1e-3 l2 = 1e-3 * T.add(*[T.square(v).sum() for v in var_list]) N = x_nx.shape[0] mse = T.sum(T.square(ytarg_ny - ypred_ny)) / N # mean squared error symb_args = [x_nx, ytarg_ny] loss = mse + l2 # loss = mse + l2 reg # set the optimizer as the manually coded Lfbgs Optimizer self.opt = LbfgsOptimizer(loss, var_list, symb_args, maxiter=maxiter, extra_losses={ "mse": mse, "l2": l2 })
def __init__(self, **kwargs): super(ResNet, self).__init__(**kwargs) assert self.status[1] == 2, "Only accept 2 sources!" assert self.status[0], "Only accept cnn layers!" x = self.sources[0] f_x = self.sources[1] time = x.output.shape[0] batch = x.output.shape[1] self.input = T.add(x.Output, f_x.Output) self.Output = T.nnet.relu(self.input) if self.attrs['batch_norm']: self.Output = self.batch_norm( h=self.Output.reshape( (self.Output.shape[0], self.Output.shape[1] * self.Output.shape[2] * self.Output.shape[3]) ), dim=self.attrs['n_out'], force_sample=self.force_sample ).reshape(self.Output.shape) output2 = self.Output.dimshuffle(0, 2, 3, 1) # (time*batch, out-row, out-col, nb feature maps) self.output = output2.reshape((time, batch, output2.shape[1] * output2.shape[2] * output2.shape[3])) # (time, batch, out-dim)
def rbf_kernel(X): XY = T.dot(X, X.T) x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x') X2e = T.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2. * XY V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = .5 * h / T.log(T.cast(H.shape[0] + 1., theano.config.floatX)) # compute the rbf kernel kxy = T.exp(-H / h / 2.0) dxkxy = -T.dot(kxy, X) sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x') dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / h return kxy, dxkxy
def _mean_h_given_v(self, v): alpha = self.usable_alpha() return tensor.add( self.b, -0.5 * ldot(v * v, self.Lambda) if self.Lambda else 0, self.mu * ldot(v, self.W), 0.5 * tensor.sqr(ldot(v, self.W))/alpha)
def add_merge_MultiBatchBeamGradAddOp(node): if node.op != T.add: return False if len(node.inputs) < 2: return False grad_op_idx = None grad_op_v = None grad_op = None for i, input in enumerate(node.inputs): if input.owner and isinstance(input.owner.op, MultiBatchBeamGradAddOp): grad_op = input.owner.op if not grad_op.inplace: # we cannot merge when we operate inplace on it grad_op_v = input grad_op_idx = i break if grad_op_idx is None: return False sum_inputs = [node.inputs[i] for i in range(len(node.inputs)) if i != grad_op_idx] if grad_op.zero_with_shape: # Make new grad_op without zero_with_shape. kwargs = {k: getattr(grad_op, k) for k in grad_op.__props__} kwargs["zero_with_shape"] = False grad_op = grad_op.__class__(**kwargs) else: old_grad_op_input0 = grad_op_v.owner.inputs[0] sum_inputs = [old_grad_op_input0] + sum_inputs assert len(sum_inputs) > 0 if len(sum_inputs) == 1: new_grad_op_input0 = sum_inputs[0] else: new_grad_op_input0 = T.add(*sum_inputs) new_grad_op_inputs = [new_grad_op_input0] + grad_op_v.owner.inputs[1:] new_v = grad_op(*new_grad_op_inputs) return [new_v]
def logp_norm(self, z): t = self.approx.normalizing_constant factors = [tt.sum(var.logpt) / t for var in self.model.basic_RVs + self.model.potentials] logpt = tt.add(*factors) p = self.approx.to_flat_input(logpt) p = theano.clone(p, {self.input: z}) return p
def variational_gradient_estimate( vars, model, minibatch_RVs=[], minibatch_tensors=[], total_size=None, n_mcsamples=1, random_seed=20090425): """Calculate approximate ELBO and its (stochastic) gradient. """ theano.config.compute_test_value = 'ignore' shared = make_shared_replacements(vars, model) # Correction sample size r = 1 if total_size is None else \ float(total_size) / minibatch_tensors[0].shape[0] other_RVs = set(model.basic_RVs) - set(minibatch_RVs) factors = [r * var.logpt for var in minibatch_RVs] + \ [var.logpt for var in other_RVs] + model.potentials logpt = tt.add(*map(tt.sum, factors)) [logp], inarray = join_nonshared_inputs([logpt], vars, shared) uw = dvector('uw') uw.tag.test_value = np.concatenate([inarray.tag.test_value, inarray.tag.test_value]) elbo = elbo_t(logp, uw, inarray, n_mcsamples=n_mcsamples, random_seed=random_seed) # Gradient grad = gradient(elbo, [uw]) return grad, elbo, shared, uw
def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env([x, b, c, one_of_n], [op(softmax(T.add(x, b, c)), one_of_n)]) assert env.outputs[0].owner.op == op print 'BEFORE' for node in env.toposort(): print node.op print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op print '====' assert len(env.toposort()) == 3 assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def logp(self, z): factors = ([tt.sum(var.logpt) for var in self.model.basic_RVs] + [tt.sum(var) for var in self.model.potentials]) p = self.approx.to_flat_input(tt.add(*factors)) p = theano.clone(p, {self.input: z}) return p
def create_weight_update_with_momentum_functions(self): weight_updates_with_momentum = [] for i in range(len(self.weights)): weight_updates_with_momentum.append( (self.weights[i], g(T.add(self.weights[i], self.H.L.momentum_weights[i]))) ) self.weight_updates_with_momentum_function = function(inputs=[], updates=weight_updates_with_momentum)
def mcmc(ll, *frvs): proposals = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, frvs)] proposals_rev = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, proposals)] full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, proposals)])) new_log_likelihood = full_log_likelihood(full_observations) logratio = new_log_likelihood - ll \ + tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals_rev, frvs)]) \ - tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals, proposals)]) accept = tensor.gt(logratio, tensor.log(U)) return [tensor.switch(accept, new_log_likelihood, ll)] + \ [tensor.switch(accept, p, f) for p, f in zip(proposals, frvs)], \ {}, theano.scan_module.until(accept)
def logp_norm(self, z): t = self.approx.normalizing_constant factors = ([tt.sum(var.logpt) / t for var in self.model.basic_RVs] + [tt.sum(var) / t for var in self.model.potentials]) logpt = tt.add(*factors) p = self.approx.to_flat_input(logpt) p = theano.clone(p, {self.input: z}) return p
def train(self, train_inputs, train_targets, optimizer=lgn.updates.adagrad, minibatch_size=None, n_epochs=1000, optimizer_kwargs=None, objective=lgn.objectives.squared_error): print("training network") # loss function lgn_outputs = lgn.layers.get_output( [lgn.layers.ReshapeLayer(self.params[o].output, (self.batch_size, self.seq_len, self.params[o].output.num_units)) for o in train_targets], deterministic=False) target_vars = [T.ftensor3("%s_targets" % o) for o in train_targets] losses = [objective(o, t).mean() for o, t in zip(lgn_outputs, target_vars)] # sum the losses for all the outputs to get the overall objective if len(losses) == 1: loss = losses[0] else: loss = T.add(*losses) # compile training update function params = lgn.layers.get_all_params([self.params[o].output for o in train_targets], trainable=True) if optimizer_kwargs is None: optimizer_kwargs = {} updates = optimizer(loss, params, **optimizer_kwargs) self.train_func = theano.function( [self.params[x].input.input_var for x in train_inputs] + target_vars, loss, updates=updates) # print("layers", lgn.layers.get_all_layers([self.params[o].output # for o in train_targets])) # print("params", lgn.layers.get_all_params([self.params[o].output # for o in train_targets])) # run training epochs with ProgressTracker(n_epochs, TerminalProgressBar()) as progress: n_inputs = len(list(train_inputs.values())[0]) minibatch_size = minibatch_size or n_inputs for _ in range(n_epochs): indices = np.random.permutation(n_inputs) for start in range(0, n_inputs - minibatch_size + 1, minibatch_size): minibatch = indices[start:start + minibatch_size] self.train_func(*( [train_inputs[x][minibatch] for x in train_inputs] + [train_targets[x][minibatch] for x in train_targets])) progress.step() print("training complete")
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act): pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden) inner_act = self.activation out_sig = inner_act(T.add(cur_w_in_sig, pre_w_sig, b_act)) mask = T.addbroadcast(mask, 1) out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig return [out_sig_m]
def mean_conv_v_given_s_h(self, s, h, With_fast): """Return the mean of binary-valued visible units v, given h and s """ W = self.get_filters(With_fast) conv_v_bias = self.get_conv_v_bias(With_fast) shW = self.convdot(s*h, W) rval = nnet.sigmoid( tensor.add(shW, conv_v_bias)) return rval
def __init__(self, n_in, n_out, input_data_list, activation_fn=tanh): self.n_in = n_in self.n_out = n_out self.activation_fn = activation_fn self.w = theano.shared( np.asarray( np.random.uniform( low=-np.sqrt(6.0 / (n_in + n_out)), high=np.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX, ), name="w", borrow=True, ) # self.b = theano.shared( # np.asarray(np.zeros((n_out)), dtype=theano.config.floatX), # name='b', borrow=True) # self.params = [self.w, self.b] # self.params = [self.w] self.b = theano.shared( np.asarray( np.random.normal(loc=0.0, scale=1.0 / (n_in + n_out), size=(n_out,)), dtype=theano.config.floatX ), name="b", borrow=True, ) self.params = [self.w, self.b] # self.w = T._shared( # np.asarray( # np.random.uniform( # low=-np.sqrt(6.0/(n_in+n_out)), high=np.sqrt(6.0/(n_in+n_out)), size=(n_in, n_out)), # dtype=theano.config.floatX), # name='w', borrow=True) # self.b = T._shared( # np.asarray(np.zeros((n_out)), dtype=theano.config.floatX), # name='b', borrow=True) self.q, self.d = input_data_list self.output = [ self.activation_fn(T.add(TS.basic.structured_dot(self.q, self.w), self.b)), self.activation_fn(T.add(TS.basic.structured_dot(self.d, self.w), self.b)), ]
def add_matrix(matrix1, matrix2): if len(matrix1) != len(matrix2) or len(matrix1[0]) != len(matrix2[0]): raise Exception('Matrizes não estão alinhadas') x = shared(np.asmatrix(matrix1, 'float32')) y = shared(np.asmatrix(matrix2, 'float32')) z = T.add(x, y) f = function([], sandbox.cuda.basic_ops.gpu_from_host(z)) return f()
def free_energy_given_v(self, v): sigmoid_arg = self._mean_h_given_v(v) hterm = tensor.sum( tensor.nnet.softplus(sigmoid_arg), axis=range(1,sigmoid_arg.ndim)) return tensor.add( 0.5 * tensor.sum( self.usable_beta() * (v**2), axis=range(1,v.ndim)), -hterm)
def free_energy_given_v(self, v): """ .. todo:: WRITEME """ sigmoid_arg = self.input_to_h_from_v(v) return tensor.add( 0.5 * (self.B * (v ** 2)).sum(axis=1), -tensor.nnet.softplus(sigmoid_arg).sum(axis=1))