def batch_pix_accuracy(output, target, ignore_bg=False): """PixAcc""" # inputs are NDarray, output 4D, target 3D predict = F.argmax(output, 1) target = target.astype(predict.dtype) if ignore_bg: pixel_labeled = (target > 0).sum().asscalar() pixel_correct = (F.equal(predict, target) * (target > 0.0)).sum().asscalar() else: pixel_labeled = (target >= 0).sum().asscalar() pixel_correct = (F.equal(predict, target) * (target >= 0.0)).sum().asscalar() return pixel_correct, pixel_labeled
def batch_pix_accuracy(output, target): """PixAcc""" # inputs are NDarray, output 4D, target 3D predict = F.argmax(output, 1) + 1 target = target.astype(predict.dtype) + 1 pixel_labeled = (target > 0).sum().asscalar() pixel_correct = (F.equal(predict, target)*(target > 0)).sum().asscalar() return pixel_correct, pixel_labeled
def hybrid_forward(self, F, fts, ys, ftt, yt): """ Semantic Alignment Loss :param F: Function :param yt: label for the target domain [N] :param ftt: features for the target domain [N, K] :param ys: label for the source domain [M] :param fts: features for the source domain [M, K] :return: """ if self._fn: # Normalize ft fts = F.L2Normalization(fts, mode='instance') ftt = F.L2Normalization(ftt, mode='instance') fts_rpt = F.broadcast_to(fts.expand_dims(axis=0), shape=(self._bs_tgt, self._bs_src, self._embed_size)) ftt_rpt = F.broadcast_to(ftt.expand_dims(axis=1), shape=(self._bs_tgt, self._bs_src, self._embed_size)) dists = F.sum(F.square(ftt_rpt - fts_rpt), axis=2) yt_rpt = F.broadcast_to(yt.expand_dims(axis=1), shape=(self._bs_tgt, self._bs_src)).astype('int32') ys_rpt = F.broadcast_to(ys.expand_dims(axis=0), shape=(self._bs_tgt, self._bs_src)).astype('int32') y_same = F.equal(yt_rpt, ys_rpt).astype('float32') y_diff = F.not_equal(yt_rpt, ys_rpt).astype('float32') intra_cls_dists = dists * y_same inter_cls_dists = dists * y_diff max_dists = F.max(dists, axis=1, keepdims=True) max_dists = F.broadcast_to(max_dists, shape=(self._bs_tgt, self._bs_src)) revised_inter_cls_dists = F.where(y_same, max_dists, inter_cls_dists) max_intra_cls_dist = F.max(intra_cls_dists, axis=1) min_inter_cls_dist = F.min(revised_inter_cls_dists, axis=1) loss = F.relu(max_intra_cls_dist - min_inter_cls_dist + self._margin) return loss
def backward(self, out_grads=None): #print('in backward') assert self.binded and self.params_initialized #tmp_ctx = self._ctx_cpu tmp_ctx = self._ctx_single_gpu fc7_outs = [] ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context))) #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu()) arcface_module_outputs = [] for i, _module in enumerate(self._arcface_modules): #_fc7 = _module.get_outputs(merge_multi_context=True)[0] out = _module.get_outputs(merge_multi_context=True) #print(out[0].shape) #print(out[1].shape) arcface_module_outputs.append(out) _fc7 = out[0] fc7_outs.append(_fc7) _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx) ctx_fc7_max[:,i] = _fc7_max local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1)) nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max) global_fc7_max = local_fc7_max #local_fc7_sum = None local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1)) local_fc7_sum[:,:] = 0.0 for i, _module in enumerate(self._arcface_modules): _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max) fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max) fc7_outs[i] = nd.exp(fc7_outs[i]) _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx) local_fc7_sum += _sum global_fc7_sum = local_fc7_sum if self._iter%self._verbose==0: #_ctx = self._context[-1] _ctx = self._ctx_cpu _probs = [] for i, _module in enumerate(self._arcface_modules): _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i]) _probs.append(_prob) fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context))) nd.concat(*_probs, dim=1, out=fc7_prob) fc7_pred = nd.argmax(fc7_prob, axis=1) local_label = self.global_label - self._local_class_start #local_label = self.get_ndarray2(_ctx, 'test_label', local_label) _pred = nd.equal(fc7_pred, local_label) print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0]) #local_fc1_grad = [] #fc1_grad_ctx = self._ctx_cpu fc1_grad_ctx = self._ctx_single_gpu local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size)) local_fc1_grad[:,:] = 0.0 total_eloss = [] celoss_verbose = 1000 if self._iter%celoss_verbose==0: fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,)) fc7_celoss[:] = 0.0 for i, _module in enumerate(self._arcface_modules): _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum) fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum) a = i*self._ctx_num_classes b = (i+1)*self._ctx_num_classes _label = self.global_label - self._ctx_class_start[i] _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label) onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes)) nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label) #print(fc7_outs[i].shape, onehot_label.shape) if self._iter%celoss_verbose==0: _ce_loss = fc7_outs[i] * onehot_label _ce_loss = nd.sum(_ce_loss, axis=1) fc7_celoss += _ce_loss.as_in_context(tmp_ctx) fc7_outs[i] -= onehot_label out = arcface_module_outputs[i] out_grads = [fc7_outs[i]] for j in range(1, len(out)): eloss = out[j] #print('eloss%d:'%j, eloss.shape) #print(out_grads[0].shape) #egrad_shape = (out_grads[0].shape[0], eloss.shape[0]) egrad_shape = eloss.shape egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape) #egrad[:][:] = 1.0/egrad_shape[0] egrad[:][:] = 1.0 out_grads.append(egrad) if self._iter%self._verbose==0: total_eloss.append(np.mean(eloss.asnumpy())) _module.backward(out_grads = out_grads) #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu()) ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0]) local_fc1_grad += ctx_fc1_grad if self._iter%self._verbose==0 and len(total_eloss)>0: print('{eloss}', self._iter, np.mean(total_eloss)) #if self._iter%self._verbose==0: if self._iter%celoss_verbose==0: ce_loss = nd.log(fc7_celoss) * -1.0 ce_loss = nd.mean(ce_loss) print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy())) global_fc1_grad = local_fc1_grad self._curr_module.backward(out_grads = [global_fc1_grad])
def array_equal(a, b): return nd.equal(a, b).asnumpy().all()
def backward(self, out_grads=None): #print('in backward') assert self.binded and self.params_initialized #tmp_ctx = self._ctx_cpu tmp_ctx = self._ctx_single_gpu fc7_outs = [] ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context))) #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu()) for i, _module in enumerate(self._arcface_modules): _fc7 = _module.get_outputs(merge_multi_context=True)[0] fc7_outs.append(_fc7) _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx) ctx_fc7_max[:,i] = _fc7_max local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1)) nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max) global_fc7_max = local_fc7_max #local_fc7_sum = None local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1)) local_fc7_sum[:,:] = 0.0 for i, _module in enumerate(self._arcface_modules): _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max) fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max) fc7_outs[i] = nd.exp(fc7_outs[i]) _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx) local_fc7_sum += _sum global_fc7_sum = local_fc7_sum if self._iter%self._verbose==0: #_ctx = self._context[-1] _ctx = self._ctx_cpu _probs = [] for i, _module in enumerate(self._arcface_modules): _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i]) _probs.append(_prob) fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context))) nd.concat(*_probs, dim=1, out=fc7_prob) fc7_pred = nd.argmax(fc7_prob, axis=1) local_label = self.global_label - self._local_class_start #local_label = self.get_ndarray2(_ctx, 'test_label', local_label) _pred = nd.equal(fc7_pred, local_label) print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0]) #local_fc1_grad = [] #fc1_grad_ctx = self._ctx_cpu fc1_grad_ctx = self._ctx_single_gpu local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size)) local_fc1_grad[:,:] = 0.0 loss = nd.zeros(shape=(self._batch_size), ctx=self._ctx_cpu) for i, _module in enumerate(self._arcface_modules): _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum) fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum) a = i*self._ctx_num_classes b = (i+1)*self._ctx_num_classes _label = self.global_label - self._ctx_class_start[i] _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label) onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes)) nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label) #for debug loss -= (mx.nd.sum(mx.nd.log(fc7_outs[i]) * onehot_label, axis=1)).as_in_context(self._ctx_cpu) fc7_outs[i] -= onehot_label _module.backward(out_grads = [fc7_outs[i]]) print('for debug, fc7 outs max is ', i, mx.nd.max(fc7_outs[i])) print('for debug, fc7 outs min is ', i, mx.nd.min(fc7_outs[i])) #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu()) ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0]) local_fc1_grad += ctx_fc1_grad print('for debug, global fc1_grad max is ', i, mx.nd.max(ctx_fc1_grad)) print('for debug, ctx fc1 grad shape, ', ctx_fc1_grad.shape) global_fc1_grad = local_fc1_grad # global_fc1_grad = mx.nd.clip(local_fc1_grad, a_min=-15, a_max=15) print('for debug, after clip global fc1_grad max is ', mx.nd.max(global_fc1_grad)) self._curr_module.backward(out_grads = [global_fc1_grad]) # for debug return mx.nd.sum(loss)
def backward(self, out_grads=None): #print('in backward') assert self.binded and self.params_initialized ## ============= forward classifier layer =========== fc7_outs = [] for i, _module in enumerate(self._arcface_modules): _fc7 = _module.get_outputs(merge_multi_context=True)[0] fc7_outs.append(_fc7) ctx_max = map( lambda fc7_out: nd.max(fc7_out, axis=1, keepdims=True). as_in_context(self._ctx_single_gpu), fc7_outs) local_fc7_max = nd.max(nd.concat(*ctx_max, dim=1), axis=1, keepdims=True) fc7_exps = list( map( lambda fc7_out: nd.exp(fc7_out - local_fc7_max.as_in_context( fc7_out.context)), fc7_outs)) ctx_sum = map( lambda fc7_out: nd.sum(fc7_out, axis=1, keepdims=True). as_in_context(self._ctx_single_gpu), fc7_exps) exp_sum = nd.sum(nd.concat(*ctx_sum, dim=1), axis=1, keepdims=True) softmax_outs = list( map( lambda fc7_exp: nd.broadcast_div( fc7_exp, exp_sum.as_in_context(fc7_exp.context)), fc7_exps)) onehot_device_labels = [ nd.one_hot((self.global_label).as_in_context(device) - self._ctx_class_start[i], depth=self._ctx_num_classes, on_value=1.0, off_value=0.0) for i, device in enumerate(self._context) ] ## ============= verbose train accuracy and loss =========== if self._iter % self._verbose == 0: local_label = self.global_label - self._local_class_start fc7_pred = self.parall_argmax(softmax_outs, self._ctx_single_gpu) _pred = nd.equal(fc7_pred, local_label).asnumpy()[0] loss = self.parall_loss(softmax_outs, onehot_device_labels, self._ctx_single_gpu).asscalar() assert not math.isnan(loss) self.logger.info( '[Iter {}] train acc : {}, total loss : {}'.format( self._iter, np.mean(_pred), loss)) ## ============= backward large weight classifier layer with gradient =========== local_fc1_grad = self.get_ndarray_by_shape( self._ctx_single_gpu, 'local_fc1_grad', (self._batch_size, self._emb_size)) local_fc1_grad[:, :] = 0.0 for i, _module in enumerate(self._arcface_modules): _module.backward( out_grads=[softmax_outs[i] - onehot_device_labels[i]]) ctx_fc1_grad = self.get_ndarray_by_v_arr( self._ctx_single_gpu, 'ctx_fc1_grad_%d' % i, _module.get_input_grads()[0]) local_fc1_grad += ctx_fc1_grad ## ============= backward backbone =============== global_fc1_grad = local_fc1_grad self._backbone_module.backward(out_grads=[global_fc1_grad])