def forward_one(x, target, hidden, prev_c, train_flag): # make input window vector distance = window // 2 char_vecs = list() x = list(x) for i in range(distance): x.append('</s>') x.insert(0,'<s>') for i in range(-distance+1 , distance + 2): char = x[target + i] char_id = char2id[char] char_vec = model.embed(get_onehot(char_id)) char_vecs.append(char_vec) concat = F.concat(tuple(char_vecs)) dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag) concat = F.concat((concat, hidden)) i_gate = F.sigmoid(model.i_gate(concat)) f_gate = F.sigmoid(model.f_gate(concat)) o_gate = F.sigmoid(model.o_gate(concat)) concat = F.concat((hidden, i_gate, f_gate, o_gate)) prev_c, hidden = F.lstm(prev_c, concat) output = model.output(hidden) dist = F.softmax(output) #print(dist.data, label, np.argmax(dist.data)) #correct = get_onehot(label) #print(output.data, correct.data) return dist
def __call__(self, x): if self.h is None: self.h = variable.Variable( self.xp.zeros(self.state_size, dtype=x[0].data.dtype), volatile='auto') zz = 0. for nth in range(0, len(self.in_channels)): zz += getattr(self, 'x_z' + str(nth))(x[nth]) zz += self.h_z(self.h) zz = F.sigmoid(zz) rr = 0. for nth in range(0, len(self.in_channels)): rr += getattr(self, 'x_r' + str(nth))(x[nth]) rr += self.h_r(self.h) rr = F.sigmoid(rr) hh = 0. for nth in range(0, len(self.in_channels)): hh += getattr(self, 'x_chr' + str(nth))(x[nth]) hh += self.h_chr(rr*self.h) hh = F.tanh(hh) y = (1-zz)*self.h + zz*hh self.h = y return y
def forward_eye_states(self, x_batch_curr, y_batch_curr, volatile): current_sample = Variable(x_batch_curr, volatile=volatile) y_batch_curr = np.asarray(y_batch_curr).reshape(32, -1) current_output = Variable(y_batch_curr, volatile=volatile) h1_current = F.sigmoid(self.model_to_use.x_h1(current_sample)) h2_current = F.sigmoid(self.model_to_use.h1_h2(h1_current)) h3_current = F.sigmoid(self.model_to_use.h2_h3(h2_current)) h4_current = F.sigmoid(self.model_to_use.h3_h4(h3_current)) h4 = h4_current y = self.model_to_use.h4_y(h4) y.data = y.data.reshape(32, -1) loss = F.sigmoid_cross_entropy(y, current_output) current_output.data = np.squeeze(current_output.data) accuracy = F.accuracy(y, current_output) return accuracy, loss, y
def forward_one(x, target, label, hidden_vec, prev_c): # make input window vector distance = window // 2 char_vecs = list() x = list(x) for i in range(distance): x.append('</s>') x.insert(0,'<s>') for i in range(-distance, distance + 1): char = x[target + i] char_id = char2id[char] char_vec = model.embed(get_onehot(char_id)) char_vecs.append(char_vec) concat = F.concat(tuple(char_vecs)) concat = F.concat((concat, hidden_vec)) i_gate = F.sigmoid(model.i_gate(concat)) f_gate = F.sigmoid(model.f_gate(concat)) o_gate = F.sigmoid(model.o_gate(concat)) concat = F.concat((hidden_vec, i_gate, f_gate, o_gate)) prev_c, hidden_vec = F.lstm(prev_c, concat) pred = F.softmax(model.output(hidden_vec)) #pred = add_delta(pred) correct = get_onehot(label) return np.argmax(pred), F.softmax_cross_entropy(pred, correct)
def forward_one_step(self, h, x, computeOutput=True): h=F.sigmoid(self.model.x_to_h(x) + self.model.h_to_h(h)) if computeOutput: y=F.sigmoid(self.model.h_to_y(h)) return h, y else: return h
def _propagate(self, Y, dropout=0.): blstm = self.blstm_layer(Y, dropout=dropout) relu_1 = F.clipped_relu(self.relu_1(blstm, dropout=dropout)) relu_2 = F.clipped_relu(self.relu_2(relu_1, dropout=dropout)) N_mask = F.sigmoid(self.noise_mask_estimate(relu_2)) X_mask = F.sigmoid(self.speech_mask_estimate(relu_2)) return N_mask, X_mask
def check_forward(self, x_data, use_cudnn=True): x = chainer.Variable(x_data) y = functions.sigmoid(x, use_cudnn=use_cudnn) self.assertEqual(y.data.dtype, numpy.float32) y_expect = functions.sigmoid(chainer.Variable(self.x)) gradient_check.assert_allclose(y_expect.data, y.data)
def __call__(self, s): accum_loss = None _, k = self.embed.W.data.shape h = Variable(np.zeros((1, k), dtype=np.float32)) c = Variable(np.zeros((1, k), dtype=np.float32)) s_length = len(s) for i in range(s_length): w1 = s[i] w2 = s[i + 1] if i < s_length - 1 else self.eos_id x_k = self.embed(Variable(np.array([w1], dtype=np.int32))) tx = Variable(np.array([w2], dtype=np.int32)) z0 = self.Wz(x_k) + self.Rz(F.dropout(h)) z1 = F.tanh(z0) i0 = self.Wi(x_k) + self.Ri(F.dropout(h)) i1 = F.sigmoid(i0) f0 = self.Wf(x_k) + self.Rf(F.dropout(h)) f1 = F.sigmoid(f0) c = i1 * z1 + f1 * c o0 = self.Wo(x_k) + self.Ro(F.dropout(h)) o1 = F.sigmoid(o0) y = o1 * F.tanh(c) h = y loss = F.softmax_cross_entropy(self.W(y), tx) accum_loss = loss if accum_loss is None else accum_loss + loss return accum_loss
def tag(self, x, z, test=True): a, b, h = self.forward(x, z, test=test) tag_a = F.sigmoid(self.out_a_tag(a)) tag_b = F.sigmoid(self.out_b_tag(b)) tag = F.sigmoid(self.out_tag(h)) tag = tag * 0.8 + tag_a * 0.1 + tag_b * 0.1 return tag
def forward(self, x_data): x = Variable(_as_mat(x_data)) t = Variable(_as_mat(x_data)) x = F.dropout(x) h = F.sigmoid(self.encoder(x)) y = F.sigmoid(self.decoder(h)) loss = F.mean_squared_error(y, t) return loss
def forward(self, x_data, train=True): x = Variable(x_data) t = Variable(x_data) if train: x = F.dropout(x) h = F.sigmoid(self.encoder(x)) y = F.sigmoid(self.decoder(h)) return F.mean_squared_error(y, t)
def forward(x_data, y_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) X1 = model.l1(x) out1 = F.sigmoid(X1) X2 = model.l2(out1) out2 = F.sigmoid(X2) return F.mean_squared_error(out2, y), out2
def decode(self, x, layer=None, train=False): if not train or layer == 2: x = F.sigmoid(self.model.dec2(x)) if not train or layer == 1: x = F.sigmoid(self.model.dec1(x)) return x
def __call__(self, x): h1 = F.sigmoid(F.average_pooling_2d(self.conv1(x), 2)) h2 = F.sigmoid(F.average_pooling_2d(self.conv2(h1),2)) h3 = self.conv3(h2) h4 = F.tanh(self.l1(h3)) p = self.l2(h4) return p
def forward(self,x_data): x = Variable(x_data) x = F.dropout(x) y = F.sigmoid(self.encoder(x)) y_hat = F.sigmoid(self.decoder(y)) Loss = F.mean_squared_error(y_hat,x) return Loss
def check_forward(self, x_data, use_cudnn=True): x = chainer.Variable(x_data) y = functions.sigmoid(x, use_cudnn=use_cudnn) self.assertEqual(y.data.dtype, self.dtype) y_expect = functions.sigmoid(chainer.Variable(self.x)) testing.assert_allclose( y_expect.data, y.data, **self.check_forward_options)
def predict(self, x_data, train=False): x = chainer.Variable(x_data) h = F.sigmoid(self.encode1(x)) h = F.sigmoid(self.encode2(h)) h = F.sigmoid(self.decode1(h)) y = F.sigmoid(self.decode2(h)) return y.data
def __call__(self, x): f1 = F.sigmoid(self.beta1) f2 = F.sigmoid(self.beta2) #self.m = f1 * self.m + (1 - f1) * x #self.v = f2 * self.v + (1 - f2) * x**2 self.m = self.beta1 * self.m + (1 - self.beta1) * x self.v = self.beta2 * self.v + (1 - self.beta2) * x**2 g = 1e-3 * self.m / F.sqrt(self.v + 1e-8) return g
def check_forward(self, x_data, use_cudnn='always'): x = chainer.Variable(x_data) with chainer.using_config('use_cudnn', use_cudnn): y = functions.sigmoid(x) self.assertEqual(y.data.dtype, self.dtype) y_expect = functions.sigmoid(chainer.Variable(self.x)) testing.assert_allclose( y_expect.data, y.data, **self.check_forward_options)
def forward(x_data): x = Variable(x_data) h1 = F.sigmoid(model.l1(x)) h2 = F.sigmoid(model.l2(h1)) y = model.l3(h2) y2 = softmax(model.l3(h2)) return y2
def forward(self, x_data, y_data, dropout, train=True): x, t = chainer.Variable(x_data), chainer.Variable(y_data) h1 = F.dropout(F.sigmoid(self.model.l1(x)), ratio=dropout, train=train) h2 = F.dropout(F.sigmoid(self.model.l2(h1)), ratio=dropout, train=train) ## softmax and accuracy for discrimination, mse for regression y = F.dropout(self.model.l3(h2), ratio=dropout, train=train) return F.mean_squared_error(y,t), t.data, y.data, y_data
def forward(self, x_data, t_data, train=True): x = chainer.Variable(x_data) t = chainer.Variable(t_data) h = F.sigmoid(self.encode1(x)) h = F.sigmoid(self.encode2(h)) h = F.sigmoid(self.decode1(h)) y = F.sigmoid(self.decode2(h)) return F.mean_squared_error(y, t)
def predict(self, x_data, y_data, train=False): # print y_data x = chainer.Variable(x_data) # x, t = Variable(x_data), Variable(y_data)#mnist h1 = F.sigmoid(self.fc1(x)) h2 = F.sigmoid(self.fc2(h1)) y = self.fc3(h2) # 最後はソフトマックスを通すのか? # print y.data, t.data # print y.data.shape, t.data.shape return y
def __call__(self, d_x_gen, d_x=None): #TODO: reverse trick bs_d_x_gen = d_x_gen.shape[0] if d_x is not None: bs_d_x = d_x.shape[0] loss = F.sum(F.log(F.sigmoid(d_x))) / bs_d_x \ + F.sum(F.log(1 - F.sigmoid(d_x_gen))) / bs_d_x_gen return - loss # to minimize else: loss = F.sum(F.log(1 - F.sigmoid(d_x_gen))) / bs_d_x_gen return loss
def forward_one(x,target, hidden, prev_c, model): # make input window vector distance = window // 2 char_vecs = list() char_type_vecs = list() x = list(x) for i in range(distance): x.append('</s>') x.append('</s>') x.insert(0,'<s>') x.insert(0,'<s>') for i in range(-distance , distance+1): char = x[target+2 + i] try: char_id = char2id[char] except(KeyError): char_id = char2id['UNK'] char_vec = model.embed(get_onehot(char_id)) char_vecs.append(char_vec) bi_gram = x[target+2+i] + x[target+2+i+1] try: bi_gram_id = char2id[bi_gram] except(KeyError): bi_gram_id = char2id['UNK'] bi_gram_char_vec = model.embed(get_onehot(bi_gram_id)) char_vecs.append(bi_gram_char_vec) char_concat = F.concat(tuple(char_vecs)) for i in range(-distance, distance+1): char = x[target+2+ i] pre_char = x[target+2+ i + 1] char_type = make_char_type(char) pre_char_type = make_char_type(pre_char) bi_gram_type = pre_char_type + char_type char_type_id = char_type2id[char_type] bigram_type_id = char_type2id[bi_gram_type] char_type_vec = model.char_type_embed(get_onehot(char_type_id)) bigram_type_vec = model.char_type_embed(get_onehot(bigram_type_id)) char_type_vecs.append(char_type_vec) char_type_vecs.append(bigram_type_vec) char_type_concat = F.concat(tuple(char_type_vecs)) #dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag) concat = F.concat((char_concat, char_type_concat)) concat = F.concat((concat, hidden)) i_gate = F.sigmoid(model.i_gate(concat)) f_gate = F.sigmoid(model.f_gate(concat)) o_gate = F.sigmoid(model.o_gate(concat)) concat = F.concat((hidden, i_gate, f_gate, o_gate)) prev_c, hidden = F.lstm(prev_c, concat) output = model.output(hidden) dist = F.softmax(output) return np.argmax(dist.data)
def _extract(self, inputs, layername): if layername == 'prob': h = self._forward(inputs, layername='conv6_4') h = average_pooling_2d(h, ksize=7) y = sigmoid(h) return y.data elif layername == 'encode1neuron': h = self._forward(inputs, layername='encode1') y = sigmoid(h) return y.data else: y = self._forward(inputs, layername) return y.data
def forward_one_step(model, x_data, u_io, u_fh, u_sh, tau_io, tau_fh, tau_sh, train=True): #original MTRNN have only sigmoid activation functions x = chainer.Variable(x_data) fh = F.sigmoid(u_fh) #fh = F.tanh(u_fh2) sh = F.sigmoid(u_sh) #sh = F.tanh(u_sh2) y = F.sigmoid(u_io) u_io2 = (1-1/tau_io)*u_io+(model.fh_to_y(fh))/tau_io u_fh2 = (1-1/tau_fh)*u_fh+(model.x_to_fh(x)+model.fh_to_fh(fh)+model.sh_to_fh(sh))/tau_fh u_sh2 = (1-1/tau_sh)*u_sh+(model.fh_to_sh(fh)+model.sh_to_sh(sh))/tau_sh return u_io2, u_fh2, u_sh2, y
def _get_region_boxes(self, output, img_W, img_H): conf_thresh = 0.1 B, C, H, W = output.shape assert C == 19 + self.n_class det_confs = F.sigmoid(output[:, 18]).data cls_conf = F.softmax(output[:, 19:19 + self.n_class]).data rpoints = output[:, :18].reshape(B, 9, 2, H, W) rpoints0 = F.sigmoid(rpoints[:, 0]) rpoints = F.concat( (rpoints0[:, None], rpoints[:, 1:]), axis=1) points_img = rpoints_to_points(rpoints.data) cls_max_ids = self.xp.argmax(cls_conf, axis=1) # cls_max_confs = self.xp.max(cls_conf, axis=1) points = [] labels = [] scores = [] for b in range(B): point = [] label = [] score = [] for cy in range(H): for cx in range(W): # only_objectness == True det_conf = det_confs[b, cy, cx] conf = det_conf if conf > conf_thresh: # cls_max_conf = cls_max_confs[b, cy, cx] cls_max_id = cls_max_ids[b, cy, cx] pnt = self.xp.zeros((9, 2), dtype=np.float32) pnt[:, 0] = points_img[b, :, 0, cy, cx] * img_W pnt[:, 1] = points_img[b, :, 1, cy, cx] * img_H # TODO: logic when only_objectness == False point.append(pnt) label.append(cls_max_id) score.append(det_conf) if len(point) == 0: point = np.zeros((0, 9, 2), dtype=np.float32) points.append(self.xp.array(point, dtype=np.float32)) labels.append(self.xp.array(label, dtype=np.int32)) scores.append(self.xp.array(score, dtype=np.float32)) return points, labels, scores
def forward(x_data): x = Variable(x_data) ''' h = F.max_pooling_2d(F.relu(model.l1(x)), ksize=5,stribe=2,pad=2) h = F.max_pooling_2d(F.relu(model.l2(h))) ''' h1 = F.sigmoid(model.l1(x)) h2 = F.sigmoid(model.l2(h1)) y = model.l3(h2) y2 = softmax(model.l3(h2)) return y2
def forward(x_data, y_data, train=True): # print("hoge", y_data) # x, t = chainer.Variable(x_data), chainer.Variable(y_data) x = chainer.Variable(x_data.reshape(batchsize, 400).astype(numpy.float32), volatile=False) t = chainer.Variable(y_data.astype(numpy.int32), volatile=False) # h1 = F.dropout(F.relu(model.l1(x)), train=train) # y = F.dropout(F.relu(model.l2(h1)), train=train) # y = model.l2(h1) h1 = F.dropout(F.sigmoid(model.l1(x)), train=train) h2 = F.dropout(F.sigmoid(model.l2(h1)), train=train) y = F.dropout(F.sigmoid(model.l3(h2)), train=train) return F.softmax_cross_entropy(y, t), F.accuracy(y, t)
def decode(self, x): h = F.relu(self.l5(x)) h = F.reshape(h, (-1, 16, 17, 17)) h = F.relu(self.deconv6(h)) h = F.relu(self.deconv7(h)) return F.sigmoid(self.deconv8(h))
def predict(self, x): h1 = F.sigmoid(self.l1(x)) h2 = F.sigmoid(self.l2(h1)) h3 = self.l3(h2) return h3
def _predict_heads_attn(self, sent_states, subword_embeds, mask, sub_lengths, batch_stats, extract_attn=False, sorted_heads=None): """For each token in the sentence predict which token in the sentence is its head.""" # sub_lengths is the mask for morph embeddings -- refer to the subword lengths # start and end word are masked batch_size, max_sent_len, col_lengths = batch_stats calc_loss = sorted_heads is not None # In order to predict which head is most probable for a given word # For each token in the sentence we get a vector represention # for that token as a head, and another for that token as a dependent. # The idea here is that we only need to calculate these once # and then reuse them to get all combinations # ------------------------------------------------------ # In g(a_j, a_i) we note that we can precompute the matrix multiplications # for each word, we consider all possible heads # we can pre-calculate U * a_j , for all a_j # head activations for each token lstm activation # bs * max_sent x mlp_arc_units h_arc = self.H_arc(sent_states) # we transform results to be indexable by sentence index for upcoming for loop # h_arc is now max_sent x bs x mlp_arc_units h_arc = F.reshape(h_arc, (-1, batch_size, self.mlp_arc_units)) # bs * max_sent x mlp_arc_units d_arc = self.D_arc(sent_states) # max_sent x bs x mlp_arc_units d_arc = F.reshape(d_arc, (-1, batch_size, self.mlp_arc_units)) # the values to use to mask softmax for head prediction # e ^ -100 is ~ zero (can be changed from self.MIN_PAD) mask_vals = Variable( self.xp.full((batch_size, max_sent_len), self.MIN_PAD, dtype=self.xp.float32)) # reshape sub_lengths for attention computation # max_sent_len x bs x self.unit_mult*self.encoder.num_units sub_lengths = F.swapaxes(sub_lengths, axis1=0, axis2=1) # subword_embeds shape is bs x max_sen x max_sub_len x units_dim # reshape subword embeds to compute attention # max_sent x bs x units_dim*max_sub_len subword_embeds = F.reshape( subword_embeds, (-1, batch_size, self.units_dim * self.max_sub_len)) sent_arcs = [] sent_attn_vectors = [] sent_h_heads = [] # we start from 1 because we don't consider root for i in range(1, max_sent_len): num_active = col_lengths[i] # if we are calculating loss create truth variables if calc_loss: # i-1 because sentence has root appended to beginning gold_heads = sorted_heads[i - 1] # ================== HEAD PREDICTION ====================== # NOTE Because some sentences may be shorter - only num_active of # the batch have valid activations for this token. If in softmax # we didn't limit arcs to [:num_active] we would need to replace # embeddings that are out of sentence range with zeros - because # otherwise when broadcasting and summing we will modify valid # batch activations for earlier tokens of the sentence. # ====================== Code for padding ========================== # invalid_pad = ((0, int(batch_size - num_active)), (0, 0)) # d_arc_pad = F.pad(d_arc[i][:num_active], # invalid_pad, 'constant', constant_values=0.) # ================================================================== # h_i is the current word and h_j is the candidate head h_i = d_arc[i] # we compute the attention for every possible head h_heads = [] attn_vectors = [] # now, we start from 0 because we consider ROOT as head for j in range(max_sent_len): # f_j is the morph features of h_j f_j = subword_embeds[j] h_j = h_arc[j] # compute the attention vector k = self.V_attn(f_j, h_i) # or.. (another option) # k = self.V_attn(f_j, F.concat((h_i, h_j), axis=1)) # attention mask, shape is the same as k attn_mask = Variable( self.xp.full(k.shape, self.MIN_PAD, dtype=self.xp.float32)) # NOTE that we also put mask to the start and end symbol of the subword unit sequence cond = sub_lengths[j] k = F.where(cond, k, attn_mask) k = F.softmax(k, axis=1) attn_vectors.append(k) # reshape k and f_j to compute m_j k = F.reshape(k, (-1, 1)) f_j = F.reshape(f_j, (-1, self.units_dim)) # compute m_j m_j = f_j * k.data m_j = F.reshape(m_j, (batch_size, self.max_sub_len, self.units_dim)) m_j = F.sum(m_j, axis=1) # compute gating function g = F.sigmoid(self.W_glob(h_j) + self.W_loc(h_i)) z_j = g * h_j + (1 - g) * m_j h_heads.append(z_j) # sent_attn_vectors store the attention vectors for each dependent word in the batch # max_sent_len - 1 (because we don't consider root) x max_sent_len x bs x num of morph features sent_attn_vectors.append(attn_vectors) h_i = self.W_dependent(h_i) h_heads = self.W_head( F.reshape(F.stack(h_heads, axis=0), (-1, self.mlp_arc_units))) h_heads = F.reshape(h_heads, (-1, batch_size, self.mlp_arc_units)) sent_h_heads.append(h_heads) a_u, a_w = F.broadcast(h_heads, h_i) arc_logit = F.reshape(F.tanh(a_u + a_w), (-1, self.mlp_arc_units)) if self.arc_dropout > 0.: arc_logit = F.dropout(arc_logit, ratio=self.arc_dropout) arc_logit = self.vT(arc_logit) arcs = F.swapaxes(F.reshape(arc_logit, (-1, batch_size)), 0, 1) arcs = F.where(mask, arcs, mask_vals) # Calculate losses if calc_loss: # we don't want to average out over seen words yet # NOTE: do not use ignore_label - in gpu mode gold_heads gets mutated # and furthermore we would need to have padded invalid state of # d_arc[i] with zeros before broadcasting. # see NOTE above head_loss = F.sum( F.softmax_cross_entropy(arcs[:num_active], gold_heads[:num_active], reduce='no')) self.loss += head_loss sent_arcs.append(F.reshape(arcs, (batch_size, -1, 1))) arcs = F.concat(sent_arcs, axis=2) # sent_h_heads store the morphological representations # for each possible heads, for each dependent word # max_sent_len - 1 (num of dep) x max_sent_len (num of possible heads) x bs x mlp_arc_units # in other words, each word has different views/representation of its possible heads sent_h_heads = F.stack(sent_h_heads, axis=0) return arcs, sent_h_heads, sent_attn_vectors
def __call__(self, x): return F.sigmoid(x)
def __call__(self, x): x = add_zero_pad(x, self.kernel // 2, 3) A = F.tanh(self.W(x)) B = F.sigmoid(self.V(x)) return A * B
def fwd(self,x): h1 = F.sigmoid(self.l1(x)) h2 = self.l2(h1) #h3 = F.softmax(h2) :順伝播の最後にsoftmax関数必要なし return h2
def mlp_forward(self, x): out1 = self.l1(x) out2 = F.sigmoid(out1) return F.sigmoid(self.l2(out2))
def __call__(self, x_target): # x_target: chainer.Variable of shape = [N, 3, H, W] # There might be dimension mismatch due to uneven down/up-sampling H, W = x_target.shape[2:] normalizer = lambda z: z h = x_target h = self.activation(normalizer(self.c1(h))) h = self.activation(normalizer(self.c1b(h))) h_c1b = h h = self.activation(normalizer(self.c2(h))) h = self.activation(normalizer(self.c2b(h))) h_c2b = h h = self.activation(normalizer(self.c3(h))) h = self.activation(normalizer(self.c3b(h))) h_c3b = h h = self.activation(normalizer(self.c4(h))) h = self.activation(normalizer(self.c4b(h))) h_c4b = h h = self.activation(normalizer(self.c5(h))) h = self.activation(normalizer(self.c5b(h))) h_c5b = h h = self.activation(normalizer(self.c6(h))) h = self.activation(normalizer(self.c6b(h))) h_c6b = h h = self.activation(normalizer(self.c7(h))) h = self.activation(normalizer(self.c7b(h))) h = self.activation(normalizer(self.dc7(h))) # There might be dimension mismatch due to uneven down/up-sampling # Resize by bilinear interpolation. # (by nearest neighbor sampling in the original implemntation.) h = resize_like(h, h_c6b) h = F.concat([h, h_c6b], axis=1) h = self.activation(normalizer(self.idc7(h))) h = self.activation(normalizer(self.dc6(h))) h = resize_like(h, h_c5b) h = F.concat([h, h_c5b], axis=1) h = self.activation(normalizer(self.idc6(h))) h = self.activation(normalizer(self.dc5(h))) h = resize_like(h, h_c4b) h = F.concat([h, h_c4b], axis=1) h = self.activation(normalizer(self.idc5(h))) h = self.activation(normalizer(self.dc4(h))) h = F.concat([h, h_c3b], axis=1) h = self.activation(normalizer(self.idc4(h))) disp4 = DISP_SCALING * F.sigmoid(self.dispout4(h)) + MIN_DISP disp4_up = F.resize_images(disp4, (H // 4, W // 4)) h = self.activation(normalizer(self.dc3(h))) h = F.concat([h, h_c2b, disp4_up], axis=1) h = self.activation(normalizer(self.idc3(h))) disp3 = DISP_SCALING * F.sigmoid(self.dispout3(h)) + MIN_DISP disp3_up = F.resize_images(disp3, (H // 2, W // 2)) h = self.activation(normalizer(self.dc2(h))) h = F.concat([h, h_c1b, disp3_up], axis=1) h = self.activation(normalizer(self.idc2(h))) disp2 = DISP_SCALING * F.sigmoid(self.dispout2(h)) + MIN_DISP disp2_up = F.resize_images(disp2, (H, W)) h = self.activation(normalizer(self.dc1(h))) h = F.concat([h, disp2_up], axis=1) h = self.activation(normalizer(self.idc1(h))) disp1 = DISP_SCALING * F.sigmoid(self.dispout1(h)) + MIN_DISP return [disp1, disp2, disp3, disp4]
def forward(self, x): h = F.sigmoid(self.l1(x)) # import ipdb; ipdb.set_trace() h = self.l2(h) return h
def fwd(self,x): # 活性化関数としてシグモイド関数を利用し出力ベクトルを得る h1 = F.sigmoid(self.l1(x)) # TODO 活性化関数はいらない? h2 = self.l2(h1) return h2
def forward(self): x = chainer.Variable(self.x) return functions.sigmoid(x)
def __call__(self, x, y): xy = x * y a = self.a3(self.a0(x) + self.a1(y) + self.a2(xy)) b = self.b3(self.b0(x) + self.b1(y) + self.b2(xy)) return b + self.a4(F.sigmoid(a))
def __call__(self, x): a, b = F.split_axis(x, 2, axis=1) h = a * F.sigmoid(b) return h
def __call__(self, fbs, ns): """ Attentionの計算 :param fbs: 順向き逆向きのEncoderの中間ベクトルが記録されたリスト (4, 1000) (3, 1000) (2, 1000) :param h: Decoderで出力された中間ベクトル :return: 順向きのEncoderの中間ベクトルの加重平均と逆向きのEncoderの中間ベクトルの加重平均 """ # ミニバッチのサイズを記憶 #batch_size = ns.data.shape[0] # ウェイトを記録するためのリストの初期化 ws = [] # ウェイトの合計値を計算するための値を初期化 # sum_w = Variable(self.ARR.zeros((batch_size, 1), dtype='float32')) # Encoderの中間ベクトルとDecoderの中間ベクトルを使ってウェイトの計算 #start_time_x = time.time() for fb, n in zip(fbs, ns): # 順向きEncoderの中間ベクトル、逆向きEncoderの中間ベクトル、Decoderの中間ベクトルを使ってウェイトの計算 # start_time = time.time() # print(fb.data.shape[0]) # print(n.shape) # n_s = self.ARR.array( # [n for _ in range(fb.data.shape[0])], dtype="float32") # print(n_s) n_s = self.ARR.tile(n, (fb.data.shape[0], 1)) # print(n_s) # print(n_s.shape) # n_s = self.ARR.empty((0, self.fnn_size), dtype=np.float32) # for _ in range(fb.data.shape[0]): # n_s = self.ARR.concatenate( # [n_s, F.reshape(copy.deepcopy(n), (-1, self.fnn_size))], axis=0) # print(n_s.shape) # exit() # interval = float(time.time() - start_time) # print("n_s実行時間: {}sec".format(interval)) # start_time = time.time() w = F.tanh( F.dropout(self.fbh(fb), ratio=self.USE_DROPOUT) + F.dropout(self.nh(n_s), ratio=self.USE_DROPOUT)) # w = F.tanh(F.dropout(self.fbh(fb), ratio=self.USE_DROPOUT) + # F.dropout(self.nh(self.ARR.full(fb.data.shape[0], n)), # ratio=self.USE_DROPOUT)) # interval = float(time.time() - start_time) # print("tanh実行時間: {}sec".format(interval)) # start_time = time.time() # softmax関数を使って正規化する w = F.exp(F.dropout(self.hw(w), ratio=self.USE_DROPOUT)) # interval = float(time.time() - start_time) # print("exp実行時間: {}sec".format(interval)) # 計算したウェイトを記録 # print(self.ARR.sum(w)) # sum_w += w ws.append(w / F.sum(w).data) #interval = float(time.time() - start_time_x) #print("tanh_exp実行時間: {}sec".format(interval)) # 出力する加重平均ベクトルの初期化 #att_fb = [] #start_time_x = time.time() att_fb = self.ARR.empty((0, self.hidden_size), dtype=np.float32) if self.flag_local == 0: for fb, w in zip(fbs, ws): # ウェイトの和が1になるように正規化 # w /= sum_w # ウェイト * Encoderの中間ベクトルを出力するベクトルに足していく # ここにローカルアテンション用の何かを入れる att_fb += F.reshape(F.matmul(fb, w), (-1, self.hidden_size)) else: D = self.local_window for fb, w, n in zip(fbs, ws, ns): # ウェイトの和が1になるように正規化 # w /= sum_w #start_time = time.time() w_local_input = fb.data.shape[0] * F.sigmoid( F.dropout(self.tw( F.tanh( F.dropout(self.nt(F.reshape(n, (1, -1))), ratio=self.USE_DROPOUT))), ratio=self.USE_DROPOUT)) # ここにローカルアテンション用の何かを入れる #interval = float(time.time() - start_time) #print("local_input実行時間: {}sec".format(interval)) #start_time = time.time() w_local_output = self.ARR.array([ self.ARR.exp( -(float(s + 1) - float(w_local_input.data))**2 / (((D / 2)**2) * 2)) for s in range(fb.data.shape[0]) ], dtype='float32') # ウェイト * Encoderの中間ベクトルを出力するベクトルに足していく #interval = float(time.time() - start_time) #print("local_out_put実行時間: {}sec".format(interval)) #start_time = time.time() w = w * F.reshape(w_local_output, (-1, 1)) #interval = float(time.time() - start_time) #print("reshape実行時間: {}sec".format(interval)) #start_time = time.time() # att_fb.append(self.ARR.sum(self.ARR.array( # [fb_x.data * w_x.data for fb_x, w_x in zip(fb, w)], dtype='float32'), axis=0)) # fb_w = F.reshape(self.ARR.sum(self.ARR.array( # [fb_x.data * w_x.data for fb_x, w_x in zip(fb, w)], dtype='float32'), axis=0), (1, -1)) # print(fb) # print(w) fb_w = F.reshape(F.matmul(F.transpose(w), fb), (1, -1)) # print(fb_w) #interval = float(time.time() - start_time) #print("kakezan実行時間: {}sec".format(interval)) #start_time = time.time() # if self.FLAG_GPU: # print(type(fb_w)) # fb_w = cuda.to_gpu(fb_w, device=0) att_fb = F.concat((att_fb, fb_w), axis=0) #print("concatenate実行時間: {}sec".format(interval)) #start_time = time.time() #interval = float(time.time() - start_time_x) #print("local実行時間: {}sec".format(interval)) return att_fb
def fwd(self, x): h1 = F.sigmoid(self.l1(x)) h2 = self.l2(h1) return h2
def fwd(self, x): #接続 mid = F.sigmoid(self.l1(x)) result = self.l2(mid) return result
def activation(self, x): #return F.leaky_relu(x) return x * F.sigmoid(x) # Swish activation function
def __call__(self, x): image, steps = x h = self.image2hidden(image) * F.sigmoid(self.embed(steps)) return self.hidden2out(h)
def __call__(self, encs, hiddens, batch_size, prev_image, num_masks, color_channels): """ Learn through StatelessCDNA. Args: encs: An array of computed transformation hiddens: An array of hidden layers batch_size: Size of mini batches prev_image: The image to transform num_masks: Number of masks to apply color_channels: Output color channels Returns: transformed: A list of masks to apply on the previous image """ logger = logging.getLogger(__name__) enc0, enc1, enc2, enc3, enc4, enc5, enc6 = encs hidden1, hidden2, hidden3, hidden4, hidden5, hidden6, hidden7 = hiddens img_height = prev_image.shape[2] img_width = prev_image.shape[3] # CDNA specific enc7 = self.enc7(enc6) enc7 = F.relu(enc7) transformed_list = list([F.sigmoid(enc7)]) # CDNA specific # Predict kernels using linear function of last layer cdna_input = F.reshape(hidden5, (int(batch_size), -1)) cdna_kerns = self.cdna_kerns(cdna_input) # Reshape and normalize # B x C x H x W => B x NUM_MASKS x 1 x H x W cdna_kerns = F.reshape( cdna_kerns, (int(batch_size), self.num_masks, 1, DNA_KERN_SIZE, DNA_KERN_SIZE)) cdna_kerns = F.relu(cdna_kerns - RELU_SHIFT) + RELU_SHIFT norm_factor = F.sum(cdna_kerns, (2, 3, 4), keepdims=True) cdna_kerns = broadcasted_division(cdna_kerns, norm_factor) # Treat the color channel dimension as the batch dimension since the same # transformation is applied to each color channel. # Treat the batch dimension as the channel dimension so that # F.depthwise_convolution_2d can apply a different transformation to each sample. cdna_kerns = F.reshape( cdna_kerns, (int(batch_size), self.num_masks, DNA_KERN_SIZE, DNA_KERN_SIZE)) cdna_kerns = F.transpose(cdna_kerns, (1, 0, 2, 3)) # Swap the batch and channel dimension. prev_image = F.transpose(prev_image, (1, 0, 2, 3)) # Transform the image. transformed = F.depthwise_convolution_2d(prev_image, cdna_kerns, stride=(1, 1), pad=DNA_KERN_SIZE / 2) # Transpose the dimensions where they belong. transformed = F.reshape(transformed, (color_channels, int(batch_size), self.num_masks, img_height, img_width)) transformed = F.transpose(transformed, (2, 1, 0, 3, 4)) transformed = F.split_axis(transformed, indices_or_sections=self.num_masks, axis=0) transformed = [F.squeeze(t, axis=0) for t in transformed] transformed_list += transformed return transformed_list, enc7
def __decode(x, layer=None, train=False): if not train or layer == 2: x = F.sigmoid(model.dec2(x)) if not train or layer == 1: x = F.sigmoid(model.dec1(x)) return x
def __call__(self, x): h1 = F.sigmoid(self.l1(x)) h2 = F.sigmoid(self.l2(h1)) return h2
def dec_forward(x_data, layer): x = chainer.Variable(x_data.astype(np.float32)) if layer >= 2: x = F.sigmoid(model.dec2(x)) y = model.dec1(x) return y
def __call__(self, x): out_plain = self.activate(self.plain(x)) out_transform = F.sigmoid(self.transform(x)) y = out_plain * out_transform + x * (1 - out_transform) return y
def f(x): y = functions.sigmoid(x) return y * y
def zoneout(self, U): if self._using_zoneout and chainer.config.train: return 1- zoneout(functions.sigmoid(-U), self._zoneout) return functions.sigmoid(U)
#初期化、設定 model = MyAE() optimizer = optimizers.SGD() optimizer.setup(model) x = Variable(np.array(x, dtype=np.float32)) #学習 for a in range(10000): model.cleargrads() loss = model(x) loss.backward() optimizer.update() #圧縮結果、6次元 y = F.sigmoid(model.l1(x)) ans = y.data #6次元を2次元に圧縮するAE class MyAE2(Chain): def __init__(self): super(MyAE2, self).__init__( #ネットワーク l1=L.Linear(6, 2), l2=L.Linear(2, 6), ) def __call__(self, x): #誤差 out = self.fwd(x)
def __call__(self, input_x, t): output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape( output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes + 5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probablitiyのacitivation # 教師データの用意 tw = np.zeros( w.shape, dtype=np.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = np.zeros(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) # 活性化後のxとyが0.5になるように学習() ty = np.tile(0.5, y.shape).astype(np.float32) if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1 box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) tconf = np.zeros( conf.shape, dtype=np.float32 ) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable( np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:])) y_shift = Variable( np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:])) w_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])) h_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu( ) best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = F.exp(w[batch]) * w_anchor / grid_w box_h = F.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable( np.broadcast_to( np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape)) truth_box_y = Variable( np.broadcast_to( np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape)) truth_box_w = Variable( np.broadcast_to( np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape)) truth_box_h = Variable( np.broadcast_to( np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape)) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu( ), truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[ best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box["w"]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box["h"]) / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data print( "best confidences and best conditional probability and predicted class of each grid:" ) for i in range(grid_h): for j in range(grid_w): print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int( maps[i][j].max(axis=1).argmax())].argmax()), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int( maps[i][j].max(axis=1).argmax())].max() * 100), end=" ") print() print( "best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) print("-------------------------------") print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable( tw), Variable(th), Variable(tconf), Variable(tprob) box_learning_scale, conf_learning_scale = Variable( box_learning_scale), Variable(conf_learning_scale) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu( ), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2 y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2 w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2 h_loss = F.sum((th - h)**2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2 p_loss = F.sum((tprob - prob)**2) / 2 print( "x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data)) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss return loss
def __call__(self, x, extractor): h = F.relu(self.c0(F.concat([x, extractor]))) h = F.sigmoid(self.c1(h)) return h
def __call__(self, x): self.chi = F.concat((x, self.r)) (self.nu, self.xi) = F.split_axis(self.l_dl(self.chi), [self.Y], 1) (self.kr, self.betar, self.kw, self.betaw, self.e, self.v, self.f, self.ga, self.gw, self.pi) = \ F.split_axis(self.xi, self.xi_split_indices, 1) self.kr = F.reshape(self.kr, (self.R, self.W)) # R * W self.betar = 1 + F.softplus(self.betar) # 1 * R # self.kw: 1 * W self.betaw = 1 + F.softplus(self.betaw) # 1 * 1 self.e = F.sigmoid(self.e) # 1 * W # self.v : 1 * W self.f = F.sigmoid(self.f) # 1 * R self.ga = F.sigmoid(self.ga) # 1 * 1 self.gw = F.sigmoid(self.gw) # 1 * 1 self.pi = F.softmax(F.reshape(self.pi, (self.R, 3))) # R * 3 (softmax for 3) # self.wr : N * R self.psi_mat = 1 - F.broadcast_to(self.f, (self.N, self.R)) * self.wr # N x R self.psi = F.prod(self.psi_mat, 1).reshape(self.N, 1) # N x 1 # self.ww, self.u : N * 1 self.u = (self.u + self.ww - (self.u * self.ww)) * self.psi self.a = u2a(self.u.data) # N * 1 self.cw = C(self.M.data, self.kw.data, self.betaw.data) # N * 1 self.ww = F.matmul( F.matmul(self.a, self.ga) + F.matmul(self.cw, 1.0 - self.ga), self.gw) # N * 1 self.M = self.M * (xp.ones( (self.N, self.W)).astype(xp.float32) - F.matmul( self.ww, self.e)) + F.matmul(self.ww, self.v) # N * W if self.K > 0: self.p = (1.0 - F.matmul(Variable(xp.ones((self.N, 1)).astype(xp.float32)), F.reshape(F.sum(self.ww), (1, 1)))) \ * self.p + self.ww # N * 1 self.p.data = xp.sort(self.p.data, 0) self.p.data[0:-self.K] = 0. self.p.data[-self.K:] = self.p.data[-self.K:] / xp.sum( self.p.data[-self.K:]) self.ww.data = xp.sort(self.ww.data, 0) self.ww.data[0:-self.K] = 0. self.ww.data[-self.K:] = self.ww[-self.K:].data / xp.sum( self.ww.data[-self.K:]) self.wwrep = F.matmul( self.ww, Variable(xp.ones( (1, self.N)).astype(xp.float32))) # N * N self.ww_p_product = xp.zeros((self.N, self.N)).astype(xp.float32) self.ww_p_product[-self.K:, -self.K:] = F.matmul( self.ww[-self.K:, -self.K:], F.transpose(self.p[-self.K:, -self.K:])).data self.L = (1.0 - self.wwrep - F.transpose( self.wwrep)) * self.L + self.ww_p_product # N * N self.L = self.L * (xp.ones( (self.N, self.N)) - xp.eye(self.N)) # force L[i,i] == 0 self.L.data[self.L.data < 1 / self.K] = 0. else: self.p = (1.0 - F.matmul(Variable(xp.ones((self.N, 1)).astype(xp.float32)), F.reshape(F.sum(self.ww), (1, 1)))) \ * self.p + self.ww # N * 1 self.wwrep = F.matmul( self.ww, Variable(xp.ones( (1, self.N)).astype(xp.float32))) # N * N self.L = (1.0 - self.wwrep - F.transpose(self.wwrep)) * self.L + F.matmul( self.ww, F.transpose(self.p)) # N * N self.L = self.L * (xp.ones( (self.N, self.N)) - xp.eye(self.N)) # force L[i,i] == 0 self.fo = F.matmul(self.L, self.wr) # N * R self.ba = F.matmul(F.transpose(self.L), self.wr) # N * R self.cr = C(self.M.data, self.kr.data, self.betar.data) self.bacrfo = F.concat(( F.reshape(F.transpose(self.ba), (self.R, self.N, 1)), F.reshape(F.transpose(self.cr), (self.R, self.N, 1)), F.reshape(F.transpose(self.fo), (self.R, self.N, 1)), ), 2) # R * N * 3 self.pi = F.reshape(self.pi, (self.R, 3, 1)) # R * 3 * 1 self.wr = F.transpose( F.reshape(F.batch_matmul(self.bacrfo, self.pi), (self.R, self.N))) # N * R self.r = F.reshape(F.matmul(F.transpose(self.M), self.wr), (1, self.R * self.W)) # W * R (-> 1 * RW) self.y = self.l_Wr(self.r) + self.nu # 1 * Y return self.y
def predict(self, atoms, adjs): with chainer.no_backprop_mode(), chainer.using_config('train', False): x = self.__call__(atoms, adjs) return F.sigmoid(x)