def first_forward(self, x, num_lm): self.rnn_1(Variable(xp.zeros((num_lm, self.n_unit)).astype(xp.float32))) h2 = F.relu(self.context_cnn_1(F.max_pooling_2d(x, 2, stride=2))) h3 = F.relu(self.context_cnn_2(F.max_pooling_2d(h2, 2, stride=2))) h4 = F.relu(self.context_cnn_3(F.max_pooling_2d(h3, 2, stride=2))) h5 = F.relu(self.rnn_2(h4)) l = F.sigmoid(self.attention_loc(h5)) b = F.sigmoid(self.baseline(Variable(h5.data))) return l, b
def first_forward(self, x, num_lm): self.rnn_1(Variable(xp.zeros((num_lm, self.n_unit)).astype(xp.float32))) h2 = F.relu(self.l_norm_cc1(self.context_cnn_1(F.average_pooling_2d(x, 4, stride=4)))) h3 = F.relu(self.l_norm_cc2(self.context_cnn_2(h2))) h4 = F.relu(self.l_norm_cc3(self.context_cnn_3(F.max_pooling_2d(h3, 2, stride=2)))) h5 = F.relu(self.l_norm_cc4(self.context_cnn_4(h4))) h6 = F.relu(self.l_norm_cc5(self.context_cnn_5(h5))) h7 = F.relu(self.context_full(F.max_pooling_2d(h6, 2, stride=2))) h8 = F.relu(self.rnn_2(h7)) l = F.sigmoid(self.attention_loc(h8)) b = F.sigmoid(self.baseline(Variable(h8.data))) return l, b
def __call__(self, x, target, num_lm, batch_size=1, train=1, debug=0, n_step=1): if train == 1: self.reset() l, b1 = self.first_forward(x, num_lm) for i in range(n_step): if i + 1 == n_step: xm, lm = self.make_img(x, l, num_lm, random=1) l1, y, b = self.recurrent_forward(xm, lm) loss = self.cul_loss(y, target, l, lm, num_lm, b1) return loss / num_lm else: xm, lm = self.make_img(x, l, num_lm, random=0) l1, y, b = self.recurrent_forward(xm, lm) l = l1 b1 = b elif train == 0: sum_accuracy = 0 ydata = xp.zeros((num_lm, self.num_class)) self.reset() l, b1 = self.first_forward(x, num_lm) for i in range(n_step): if i + 1 == n_step: xm, lm = self.make_img(x, l, num_lm, random=0) l1, y, b = self.recurrent_forward(xm, lm) accuracy = y.data * target.data sum_accuracy += xp.sum(accuracy) ydata += y.data return sum_accuracy / ( num_lm * n_step), ydata / n_step, xp.sum(accuracy) / num_lm else: xm, lm = self.make_img(x, l, num_lm, random=0) l1, y, b = self.recurrent_forward(xm, lm) accuracy = y.data * target.data sum_accuracy += xp.sum(accuracy) ydata += y.data l = l1 elif train == 2: sum_accuracy = 0 ydata = xp.zeros((num_lm, self.num_class)) self.reset() l_list = xp.zeros((n_step, num_lm, 2)) s_list = xp.zeros((n_step, num_lm, 1)) l, b1 = self.first_forward(x, num_lm) l_list[0] = l.data s_list[0] = self.gsize / self.img_size for i in range(n_step): if i + 1 == n_step: xm, lm = self.make_img(x, l, num_lm, random=0) l1, y, b = self.recurrent_forward(xm, lm) accuracy = y.data * target.data sum_accuracy += xp.sum(accuracy) ydata += y.data return sum_accuracy / ( num_lm * n_step), ydata / n_step, xp.sum( accuracy) / num_lm, l_list, s_list else: xm, lm = self.make_img(x, l, num_lm, random=0) l1, y, b = self.recurrent_forward(xm, lm) accuracy = y.data * target.data sum_accuracy += xp.sum(accuracy) ydata += y.data l = l1 l_list[i + 1] = l.data s_list[i + 1] = self.gsize / self.img_size return False
def __call__(self, x, target, num_lm, batch_size=1, train=1, debug=0, n_step=1): if train == 1: self.reset() r_buf = 0 l, s, b = self.first_forward(x, num_lm) for i in range(n_step): if i + 1 == n_step: xm, lm, sm = self.make_img(x, l, s, num_lm, random=1) l1, s1, y, b1 = self.recurrent_forward(xm, lm, sm) loss, size_p = self.cul_loss(y, target, l, s, lm, sm) r_buf += size_p r = xp.where( xp.argmax(y.data, axis=1) == xp.argmax(target.data, axis=1), 1, 0).reshape((num_lm, 1)).astype(xp.float32) loss += F.sum((r - b) * (r - b)) k = self.r * (r - b.data) loss += F.sum(k * r_buf) return loss / num_lm else: xm, lm, sm = self.make_img(x, l, s, num_lm, random=1) l1, s1, y, b1 = self.recurrent_forward(xm, lm, sm) loss, size_p = self.cul_loss(y, target, l, s, lm, sm) r_buf += size_p l = l1 s = s1 b = b1 elif train == 0: self.reset() l, s, b1 = self.first_forward(x, num_lm) for i in range(n_step): if i + 1 == n_step: xm, lm, sm = self.make_img(x, l, s, num_lm, random=0) l1, s1, y, b = self.recurrent_forward(xm, lm, sm) accuracy = y.data * target.data return xp.sum(accuracy) / num_lm, y.data / n_step, xp.sum( accuracy) / num_lm else: xm, lm, sm = self.make_img(x, l, s, num_lm, random=0) l1, s1, y, b = self.recurrent_forward(xm, lm, sm) l = l1 s = s1 elif train == 2: sum_accuracy = 0 ydata = xp.zeros((num_lm, self.num_class)) self.reset() l_list = xp.zeros((n_step, num_lm, 2)) s_list = xp.zeros((n_step, num_lm, 1)) l, s, b1 = self.first_forward(x, num_lm) l_list[0] = l.data s_list[0] = s.data for i in range(n_step): if i + 1 == n_step: xm, lm, sm = self.make_img(x, l, s, num_lm, random=0) l1, s1, y, b = self.recurrent_forward(xm, lm, sm) accuracy = y.data * target.data sum_accuracy += xp.sum(accuracy) ydata += y.data z = np.power(10, s_list - 1) return sum_accuracy / ( num_lm * n_step), ydata / n_step, xp.sum( accuracy, axis=1) / num_lm, l_list, z else: xm, lm, sm = self.make_img(x, l, s, num_lm, random=0) l1, s1, y, b = self.recurrent_forward(xm, lm, sm) accuracy = y.data * target.data sum_accuracy += xp.sum(accuracy) ydata += y.data l = l1 s = s1 l_list[i + 1] = l.data s_list[i + 1] = s.data return False