def compute_retrospective_loss(self, observed_arr, encoded_arr,
                                   decoded_arr, re_encoded_arr):
        '''
        Compute retrospective loss.

        Returns:
            The tuple data.
            - `np.ndarray` of delta.
            - `np.ndarray` of losses of each batch.
            - float of loss of all batch.

        '''
        if self.__output_neuron_count == self.__hidden_neuron_count:
            target_arr = nd.broadcast_sub(
                encoded_arr, nd.expand_dims(observed_arr.mean(axis=2), axis=2))
            summary_delta_arr = nd.sqrt(nd.power(decoded_arr - target_arr, 2))
        else:
            # For each batch, draw a samples from the Uniform distribution.
            if self.__output_neuron_count > self.__hidden_neuron_count:
                all_dim_arr = np.arange(self.__output_neuron_count)
                np.random.shuffle(all_dim_arr)
                choiced_dim_arr = all_dim_arr[:self.__hidden_neuron_count]
                target_arr = nd.broadcast_sub(
                    encoded_arr,
                    nd.expand_dims(observed_arr[:, :,
                                                choiced_dim_arr].mean(axis=2),
                                   axis=2))
                summary_delta_arr = nd.sqrt(
                    nd.power(decoded_arr[:, :, choiced_dim_arr] - target_arr,
                             2))
            else:
                all_dim_arr = np.arange(self.__hidden_neuron_count)
                np.random.shuffle(all_dim_arr)
                choiced_dim_arr = all_dim_arr[:self.__output_neuron_count]
                target_arr = nd.broadcast_sub(
                    encoded_arr[:, :, choiced_dim_arr],
                    nd.expand_dims(observed_arr.mean(axis=2), axis=2))
                summary_delta_arr = nd.sqrt(
                    nd.power(decoded_arr - target_arr, 2))

        match_delta_arr = None
        for i in range(self.__batch_size):
            arr = nd.sqrt(
                nd.power(encoded_arr[i, -1] - re_encoded_arr[i, -1], 2))
            if match_delta_arr is None:
                match_delta_arr = nd.expand_dims(arr, axis=0)
            else:
                match_delta_arr = nd.concat(match_delta_arr,
                                            nd.expand_dims(arr, axis=0),
                                            dim=0)

        delta_arr = summary_delta_arr + nd.expand_dims(
            self.__retrospective_lambda * match_delta_arr, axis=1)
        v = nd.norm(delta_arr)
        if v > self.__grad_clip_threshold:
            delta_arr = delta_arr * self.__grad_clip_threshold / v

        loss = nd.mean(delta_arr, axis=0, exclude=True)

        return loss
示例#2
0
    def backward_sample(self, total_feature, label):
        this_rank_classes = int(self.memory_bank.num_sample)
        local_index, unique_sorted_global_label = self.memory_bank.sample(
            label)

        # Get local index
        _mapping_dict = {}
        local_sampled_class = local_index + self.rank * self.memory_bank.num_local
        global_label_set = set(unique_sorted_global_label)
        for idx, absolute_label in enumerate(local_sampled_class):
            if absolute_label in global_label_set:
                _mapping_dict[
                    absolute_label] = idx + self.rank * self.memory_bank.num_sample

        label_list = list(label.asnumpy())
        mapping_label = []
        for i in range(len(label_list)):
            absolute_label = label_list[i]
            if absolute_label in _mapping_dict.keys():
                mapping_label.append(_mapping_dict[absolute_label])
            else:
                mapping_label.append(-1)

        mapping_label = nd.array(mapping_label, dtype=np.int32)

        # Get weight
        local_index = nd.array(local_index)
        local_index = self.get_ndarray2(self.gpu, "local_index", local_index)
        sample_weight, sample_weight_mom = self.memory_bank.get(local_index)

        # Sync to gpu
        if self.memory_bank.gpu:
            _data = self.get_ndarray2(self.gpu, "data_%d" % self.rank,
                                      total_feature)
            _weight = self.get_ndarray2(self.gpu, 'weight_%d' % self.rank,
                                        sample_weight)
            _weight_mom = self.get_ndarray2(self.gpu,
                                            'weight_mom_%d' % self.rank,
                                            sample_weight_mom)
        else:
            _data = self.get_ndarray2(self.gpu, "data_%d" % self.rank,
                                      total_feature)
            _weight = self.get_ndarray2(self.gpu, 'weight_%d' % self.rank,
                                        sample_weight)
            _weight_mom = self.get_ndarray2(self.gpu,
                                            'weight_mom_%d' % self.rank,
                                            sample_weight_mom)

        # Attach grad
        _data.attach_grad()
        _weight.attach_grad()

        # Convert label
        _label = self.get_ndarray2(self.gpu, 'mapping_label_%d' % self.rank,
                                   mapping_label)
        _label = _label - int(self.rank * self.memory_bank.num_sample)
        _fc7, _one_hot = self.fc7_model.forward(_data,
                                                _weight,
                                                mapping_label=_label,
                                                depth=this_rank_classes)

        # Sync max
        max_fc7 = nd.max(_fc7, axis=1, keepdims=True)
        max_fc7 = nd.reshape(max_fc7, -1)

        total_max_fc7 = self.get_ndarray(context=self.gpu,
                                         name='total_max_fc7',
                                         shape=(max_fc7.shape[0], self.size),
                                         dtype='float32')
        total_max_fc7[:] = 0
        total_max_fc7[:, self.rank] = max_fc7
        hvd.allreduce_(total_max_fc7, average=False)

        global_max_fc7 = self.get_ndarray(context=self.gpu,
                                          name='global_max_fc7',
                                          shape=(max_fc7.shape[0], 1),
                                          dtype='float32')
        nd.max(total_max_fc7, axis=1, keepdims=True, out=global_max_fc7)

        # Calculate exp(logits)
        _fc7_grad = nd.broadcast_sub(_fc7, global_max_fc7)
        _fc7_grad = nd.exp(_fc7_grad)

        # Calculate sum
        sum_fc7 = nd.sum(_fc7_grad, axis=1, keepdims=True)
        global_sum_fc7 = hvd.allreduce(sum_fc7, average=False)

        # Calculate grad
        _fc7_grad = nd.broadcast_div(_fc7_grad, global_sum_fc7)

        # Calculate loss
        tmp = _fc7_grad * _one_hot
        tmp = nd.sum(tmp, axis=1, keepdims=True)
        tmp = self.get_ndarray2(self.gpu, 'ctx_loss', tmp)
        tmp = hvd.allreduce(tmp, average=False)
        global_loss = -nd.mean(nd.log(tmp + 1e-30))

        _fc7_grad = _fc7_grad - _one_hot

        # Backward
        _fc7.backward(out_grad=_fc7_grad)

        # Update center
        _weight_grad = _weight.grad
        self.memory_optimizer.update(weight=_weight,
                                     grad=_weight_grad,
                                     state=_weight_mom,
                                     learning_rate=self.memory_lr)
        if self.memory_bank.gpu:
            self.memory_bank.set(index=local_index,
                                 updated_weight=_weight,
                                 updated_weight_mom=_weight_mom)
        else:
            self.memory_bank.set(index=local_index,
                                 updated_weight=self.get_ndarray2(
                                     mx.cpu(), "cpu_weight_%d" % self.rank,
                                     _weight),
                                 updated_weight_mom=self.get_ndarray2(
                                     mx.cpu(), "cpu_weight_mom_%d" % self.rank,
                                     _weight_mom))
        return _data.grad, global_loss
示例#3
0
    def backward(self, total_feature, label):
        memory_bank = self.memory_bank
        assert memory_bank.num_local == memory_bank.num_sample, "pass"

        _data = self.get_ndarray2(self.gpu, "data_%d" % self.rank,
                                  total_feature)
        # Attach grad
        _data.attach_grad()
        memory_bank.weight.attach_grad()

        # Convert label
        _label = self.get_ndarray2(self.gpu, 'label_%d' % self.rank, label)
        _label = _label - int(self.rank * memory_bank.num_local)
        _fc7, _one_hot = self.fc7_model.forward(_data,
                                                memory_bank.weight,
                                                mapping_label=_label,
                                                depth=memory_bank.num_local)

        # Sync max
        max_fc7 = nd.max(_fc7, axis=1, keepdims=True)
        max_fc7 = nd.reshape(max_fc7, -1)

        total_max_fc7 = self.get_ndarray(context=self.gpu,
                                         name='total_max_fc7',
                                         shape=(max_fc7.shape[0], self.size),
                                         dtype='float32')
        total_max_fc7[:] = 0
        total_max_fc7[:, self.rank] = max_fc7
        hvd.allreduce_(total_max_fc7, average=False)

        global_max_fc7 = self.get_ndarray(context=self.gpu,
                                          name='global_max_fc7',
                                          shape=(max_fc7.shape[0], 1),
                                          dtype='float32')
        nd.max(total_max_fc7, axis=1, keepdims=True, out=global_max_fc7)

        # Calculate exp(logits)
        _fc7_grad = nd.broadcast_sub(_fc7, global_max_fc7)
        _fc7_grad = nd.exp(_fc7_grad)

        # Calculate sum
        sum_fc7 = nd.sum(_fc7_grad, axis=1, keepdims=True)
        global_sum_fc7 = hvd.allreduce(sum_fc7, average=False)

        # Calculate prob
        _fc7_grad = nd.broadcast_div(_fc7_grad, global_sum_fc7)

        # Calculate loss
        tmp = _fc7_grad * _one_hot
        tmp = nd.sum(tmp, axis=1, keepdims=True)
        tmp = self.get_ndarray2(self.gpu, 'ctx_loss', tmp)
        tmp = hvd.allreduce(tmp, average=False)
        global_loss = -nd.mean(nd.log(tmp + 1e-30))

        # Calculate fc7 grad
        _fc7_grad = _fc7_grad - _one_hot

        # Backward
        _fc7.backward(out_grad=_fc7_grad)

        # Update center
        _weight_grad = memory_bank.weight.grad
        self.memory_optimizer.update(weight=memory_bank.weight,
                                     grad=_weight_grad,
                                     state=memory_bank.weight_mom,
                                     learning_rate=self.memory_lr)

        return _data.grad, global_loss
示例#4
0
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        arcface_module_outputs = []
        for i, _module in enumerate(self._arcface_modules):
          #_fc7 = _module.get_outputs(merge_multi_context=True)[0]
          out = _module.get_outputs(merge_multi_context=True)
          #print(out[0].shape)
          #print(out[1].shape)
          arcface_module_outputs.append(out)
          _fc7 = out[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0
        total_eloss = []
        celoss_verbose = 1000
        if self._iter%celoss_verbose==0:
          fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,))
          fc7_celoss[:] = 0.0

        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          #print(fc7_outs[i].shape, onehot_label.shape)

          if self._iter%celoss_verbose==0:
            _ce_loss = fc7_outs[i] * onehot_label
            _ce_loss = nd.sum(_ce_loss, axis=1)
            fc7_celoss += _ce_loss.as_in_context(tmp_ctx)
          fc7_outs[i] -= onehot_label

          out = arcface_module_outputs[i]
          out_grads = [fc7_outs[i]]
          for j in range(1, len(out)):
              eloss = out[j]
              #print('eloss%d:'%j, eloss.shape)
              #print(out_grads[0].shape)
              #egrad_shape = (out_grads[0].shape[0], eloss.shape[0])
              egrad_shape = eloss.shape
              egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape)
              #egrad[:][:] = 1.0/egrad_shape[0]
              egrad[:][:] = 1.0
              out_grads.append(egrad)
              if self._iter%self._verbose==0:
                  total_eloss.append(np.mean(eloss.asnumpy()))

          _module.backward(out_grads = out_grads)
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad

        if self._iter%self._verbose==0 and len(total_eloss)>0:
          print('{eloss}', self._iter, np.mean(total_eloss))
        #if self._iter%self._verbose==0:
        if self._iter%celoss_verbose==0:
          ce_loss = nd.log(fc7_celoss) * -1.0
          ce_loss = nd.mean(ce_loss)
          print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy()))

        global_fc1_grad = local_fc1_grad
        self._curr_module.backward(out_grads = [global_fc1_grad])
示例#5
0
def ISSM(z, b, F, a, g, sigma, m_prior, S_prior):
    '''
    The documentation for this code can be found in :
    https://gluon.mxnet.io/chapter12_time-series/issm-scratch.html
    '''

    H = F.shape[0] # dim of latent state
    T = z.shape[0] # num of observations

    eye_h = nd.array(np.eye(H))

    mu_seq = []
    S_seq = []
    log_p_seq = []

    for t in range(T):

        if t == 0:
            # At the first time step, use the prior
            mu_h = m_prior
            S_hh = S_prior
        else:
            # Otherwise compute using update eqns.
            F_t = F[:, :, t]
            g_t = g[:, t].reshape((H,1))

            mu_h = gemm2(F_t, mu_t)
            S_hh = gemm2(F_t, gemm2(S_t, F_t, transpose_b=1)) + \
                   gemm2(g_t, g_t, transpose_b=1)

        a_t = a[:, t].reshape((H,1))
        mu_v = gemm2(mu_h, a_t, transpose_a=1)

        # Compute the Kalman gain (vector)
        S_hh_x_a_t = gemm2(S_hh, a_t)

        sigma_t = sigma[t]
        S_vv = gemm2(a_t, S_hh_x_a_t, transpose_a=1) + nd.square(sigma_t)
        kalman_gain = nd.broadcast_div(S_hh_x_a_t, S_vv)

        # Compute the error (delta)
        delta = z[t] - b[t] - mu_v

        # Filtered estimates
        mu_t = mu_h + gemm2(kalman_gain, delta)

        # Joseph's symmetrized update for covariance:
        ImKa = nd.broadcast_sub(eye_h, gemm2(kalman_gain, a_t, transpose_b=1))
        S_t = gemm2(gemm2(ImKa, S_hh), ImKa, transpose_b=1) + \
                nd.broadcast_mul(gemm2(kalman_gain, kalman_gain, transpose_b=1), nd.square(sigma_t))

        # likelihood term
        log_p = (-0.5 * (delta * delta / S_vv
                         + np.log(2.0 * np.pi)
                         + nd.log(S_vv))
                 )

        mu_seq.append(mu_t)
        S_seq.append(S_t)
        log_p_seq.append(log_p)


    return log_p_seq
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        for i, _module in enumerate(self._arcface_modules):
          _fc7 = _module.get_outputs(merge_multi_context=True)[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0

        loss = nd.zeros(shape=(self._batch_size), ctx=self._ctx_cpu)
        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          
          #for debug
          loss -= (mx.nd.sum(mx.nd.log(fc7_outs[i]) * onehot_label, axis=1)).as_in_context(self._ctx_cpu)
          fc7_outs[i] -= onehot_label
          _module.backward(out_grads = [fc7_outs[i]])
          print('for debug, fc7 outs max is ', i, mx.nd.max(fc7_outs[i]))
          print('for debug, fc7 outs min is ', i, mx.nd.min(fc7_outs[i]))
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad
          print('for debug, global fc1_grad max is ', i, mx.nd.max(ctx_fc1_grad))
          print('for debug, ctx fc1 grad shape, ', ctx_fc1_grad.shape)

        global_fc1_grad = local_fc1_grad
        #  global_fc1_grad = mx.nd.clip(local_fc1_grad, a_min=-15, a_max=15)
        print('for debug, after clip global fc1_grad max is ', mx.nd.max(global_fc1_grad))
        self._curr_module.backward(out_grads = [global_fc1_grad])
        # for debug
        return mx.nd.sum(loss)