示例#1
0
    def viterbi_decode(self, state_feats):
        """
        :param state_feats: shape=(batch_size, seq_len, tag_size)
        :return:
            path_score: shape=(batch_size,)
            best_path: shape=(batch_size, seq_len)
        """
        backpointers = []
        state_feats_tmp = state_feats.transpose((1, 0, 2))
        max_score = state_feats_tmp[0]

        if state_feats_tmp.shape[0] > 1:
            for feat in state_feats_tmp[1:]:
                next_tag_score = max_score.expand_dims(1) + (
                    feat.expand_dims(1) + self._transitions.data()).transpose(
                        (0, 2, 1))
                backpointers.append(nd.argmax(next_tag_score, axis=-1))
                max_score = nd.max(next_tag_score, axis=-1)

        best_tag = nd.argmax(max_score, axis=-1)
        path_score = nd.pick(max_score, best_tag)

        best_path = [best_tag]
        for bp in reversed(backpointers):
            best_path.append(nd.pick(bp, best_path[-1]))

        best_path.reverse()
        best_path = nd.concat(*map(lambda x: x.expand_dims(0), best_path),
                              dim=0).transpose()
        return path_score, best_path
示例#2
0
 def replay(self):
     # experience replay
     if len(self.memory) < self.batch_size:
         return
     batch = random.sample(self.memory, self.batch_size)
     state_batch = nd.array([b[0] for b in batch]) 
     action_batch = nd.array([b[1] for b in batch])
     reward_batch = nd.array([b[2] for b in batch])
     next_state_batch = nd.array([b[3] for b in batch])
     # Double-DQN:
     # Calculate target value by choosing action with online network, 
     # and getting value from target network
     target_action_batch = np.argmax(self.train_model(next_state_batch), 1)
     target_batch = reward_batch + self.gamma * \
             nd.pick(self.target_model(next_state_batch), target_action_batch, 1)
             #np.max(self.target_model(next_state_batch),1)
     for i in range(self.batch_size): # s, a, r, _s, d
         if batch[i][4]:
             target_batch[i] = reward_batch[i]
             #target_batch[i] = target_batch[i] + self.gamma * \
             #        np.max(self.model(nd.reshape(next_state_batch[i],[1,4])),1)
     with autograd.record():
         q_target_batch = self.train_model(state_batch)
         #print(q_target_batch.shape,"\n", target_batch.shape)
         output_batch = nd.pick(q_target_batch, action_batch, 1)
         loss = self.loss(output_batch,target_batch)
     loss.backward()
     self.train_loss += loss.mean().asscalar()
     self.trainer.step(self.batch_size)
     return
示例#3
0
    def _viterbi_decode(self, feats):
        '''
        CRF 的预测算法,维特比算法,即根据特征找出最好的路径
        feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size)
        '''
        backpointers = []
        batch_size = feats[0].shape[0]
        vvars = nd.full((1, self.tagset_size), -10000., ctx=self.ctx)
        vvars[0, self.tag2idx[START_TAG]] = 0
        # vvars 形状:(batch_size, tagset_size)
        vvars = nd.broadcast_axis(vvars, axis=0, size=batch_size)

        for feat in feats:
            bptrs_t = []
            viterbivars_t = []

            for next_tag in range(self.tagset_size):
                next_tag_var = vvars + nd.broadcast_axis(
                    self.transitions.data()[next_tag].reshape((1, -1)),
                    axis=0,
                    size=batch_size)
                # best_tag_id 形状(batch_size, 1)
                best_tag_id = nd.argmax(next_tag_var, axis=1, keepdims=True)
                bptrs_t.append(best_tag_id)
                # viterbivars_t 列表中每个元素的形状为 (batch_size, 1)
                viterbivars_t.append(
                    nd.pick(next_tag_var, best_tag_id, axis=1, keepdims=True))
            vvars = (nd.concat(*viterbivars_t, dim=1) + feat)
            # bptrs_t 形状 :(batch_size, tagset_size)
            bptrs_t = nd.concat(*bptrs_t, dim=1)
            backpointers.append(bptrs_t)

        # 转换到 STOP_TAG
        terminal_var = vvars + self.transitions.data()[self.tag2idx[START_TAG]]
        best_tag_id = nd.argmax(terminal_var, axis=1)
        # path_score 形状(batch_size, )
        path_score = nd.pick(terminal_var, best_tag_id, axis=1)

        # 根据反向指针 backpointers 去解码最好的路径
        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = nd.pick(bptrs_t, best_tag_id, axis=1)
            best_path.append(best_tag_id)
        # 移除开始符号
        # start 形状为 (batch_size, )
        start = best_path.pop()
        # 检查start是否为开始符号
        for i in range(batch_size):
            assert start[i].asscalar() == self.tag2idx[START_TAG]
        best_path.reverse()

        # 构建最佳路径的矩阵
        new_best_path = []
        for best_tag_id in best_path:
            best_tag_id = best_tag_id.reshape((-1, 1))
            new_best_path.append(best_tag_id)
        best_path_matrix = nd.concat(*new_best_path, dim=1)

        return path_score, best_path_matrix
示例#4
0
    def forward(self, pred, target, mask):
        # truncate to the same size
        pred = pred.copy()
        bsz = pred.shape[0]
        target = target.copy()
        mask = mask.copy()
        #print('target.shape:',target.shape)                         #target.shape: (8, 30)
        target = target[:, :pred.shape[1]].reshape(-1, 1)
        #print('target.shape:',target.shape)                         #target.shape: (240, 1)
        mask = mask[:, :pred.shape[1]].reshape(-1, 1)
        #print('mask.shape:',mask.shape)                             #mask.shape: (240, 1)
        pred = pred.reshape(-1, pred.shape[2])
        #print('pred.shape:',pred.shape)                             #pred.shape: (240, 22)
        # compute loss
        #target = target.expand_dims(axis=0).broadcast_to(shape=(2,target.shape[0],target.shape[1]))
        loss = -nd.pick(pred, target).expand_dims(
            axis=1
        ) * mask  #gather(pred,dim=1,index = target) * mask   #gather_nd
        #print("loss.shape:",loss.shape,nd.pick(pred,target).shape)

        loss = nd.sum(loss) / nd.sum(mask)

        # compute accuracy
        idx = nd.argmax(pred, axis=1).astype('int64')
        #print( idx.dtype,target.dtype)
        correct = (idx == nd.squeeze(target))
        correct = correct.astype('float32') * nd.squeeze(mask)
        accuracy = nd.sum(correct) / nd.sum(mask)
        return loss, accuracy
示例#5
0
 def forward(self, output, label):
     output = nd.softmax(output)
     pt = nd.pick(output, label, axis=self._axis, keepdims=True)
     # print output.asnumpy()[np.where(label.asnumpy() > 0)]
     loss = -self._alpha * ((1 - pt)**self._gamma) * nd.log(pt)
     # loss = - nd.log(pt)
     return nd.mean(loss, axis=self._batch_axis, exclude=True)
示例#6
0
    def train_policy_net(self, imgs, actions, rs, terminals):
        """
        Train one batch.

        Arguments:

        imgs - b x (f + 1) x C x H x W numpy array, where b is batch size,
               f is num frames, h is height and w is width.
        actions - b x 1 numpy array of integers
        rewards - b x 1 numpy array
        terminals - b x 1 numpy boolean array (currently ignored)

        Returns: average loss
        """
        batch_size = actions.shape[0]

        states = imgs[:, :-1, :, :, :]
        next_states = imgs[:, 1:, :, :, :]
        s = states.shape
        states = states.reshape(
            (s[0], -1, s[-2], s[-1]))  # batch x (f x C) x H x W
        next_states = next_states.reshape(
            (s[0], -1, s[-2], s[-1]))  # batch x (f x C) x H x W

        st = nd.array(states, ctx=self.ctx, dtype=np.float32) / 255.0
        at = nd.array(actions[:, 0], ctx=self.ctx)
        rt = nd.array(rs[:, 0], ctx=self.ctx)
        tt = nd.array(terminals[:, 0], ctx=self.ctx)
        st1 = nd.array(next_states, ctx=self.ctx, dtype=np.float32) / 255.0

        next_qs = self.target_net(st1)
        next_q_out = nd.max(next_qs, axis=1)
        target = rt + next_q_out * (1.0 - tt) * DISCOUNT

        with autograd.record():
            current_qs = self.policy_net(st)
            current_q = nd.pick(current_qs, at, 1)
            loss = self.loss_func(target, current_q)
            # diff = nd.abs(current_q - target)
            # quadratic_part = nd.clip(diff, -1, 1)
            # loss = 0.5 * nd.sum(nd.square(quadratic_part)) + nd.sum(diff - quadratic_part)

            # print('current_qs', current_qs)
            # print('current_q', current_q)
            # print('diff', diff)
            # print('quadratic_part', quadratic_part)
            # print('loss', loss)

        loss.backward()

        # 梯度裁剪
        if GRAD_CLIPPING_THETA is not None:
            params = [
                p.data() for p in self.policy_net.collect_params().values()
            ]
            g_utils.grad_clipping(params, GRAD_CLIPPING_THETA, self.ctx)

        self.trainer.step(batch_size)
        total_loss = loss.mean().asscalar()
        return total_loss
示例#7
0
    def update(self):
        states = nd.array(self.states, ctx=self.ctx)
        actions = nd.array(self.actions, ctx=self.ctx)
        total_reward = nd.array(self.total_reward, ctx=self.ctx)

        # ------------optimize actor-----------
        with autograd.record():
            values = self.critic_network(states)
            probs = self.actor_network(states)
            advantages = (total_reward - values).detach()
            loss = -nd.pick(probs, actions).log() * advantages
        self.actor_network.collect_params().zero_grad()
        loss.backward()
        self.actor_optimizer.step(batch_size=len(states))

        # -----------optimize critic------------
        with autograd.record():
            values = self.critic_network(states)
            l2_loss = gloss.L2Loss()
            loss = l2_loss(values, total_reward)
        self.critic_network.collect_params().zero_grad()
        loss.backward()
        self.critic_optimizer.step(batch_size=len(states))

        self.states = []
        self.actions = []
        self.rewards = []
        self.dones = []
        self.next_states = []
        self.total_reward = []
    def hybrid_forward(self, F, output, *args, **kwargs):
        '''
        Returns the Softmax Cross Entropy loss of a model with a graph vocab, in the style of a sentinel pointer network
        Note: Unlike VarNamingLoss, this Loss DOES expect the last dimension of output to be probabilities summing to 1
        '''
        (label, _), data_encoder = args
        joint_label, label_lengths = label.values, label.value_lengths
        # We're using pick and not just sparse labels for XEnt b/c there can be multiple ways to point to the correct subtoken
        loss = nd.pick(output, joint_label, axis=2)

        # Masking outputs to max(length_of_output (based on emitting value 0), length_of_label)
        output_preds = nd.argmax(output, axis=2).asnumpy()
        output_lengths = []
        for row in output_preds:
            end_token_idxs = np.where(row == 0)[0]
            if len(end_token_idxs):
                output_lengths.append(int(min(end_token_idxs)) + 1)
            else:
                output_lengths.append(output.shape[1])
        output_lengths = nd.array(output_lengths, ctx=output.context)
        mask_lengths = nd.maximum(output_lengths, label_lengths)
        loss = nd.SequenceMask(loss,
                               value=1.0,
                               use_sequence_length=True,
                               sequence_length=mask_lengths,
                               axis=1)

        return nd.mean(-nd.log(loss), axis=0, exclude=True)
示例#9
0
 def choose_action(self, state):
     state = nd.array([state], ctx=self.ctx)
     all_action_prob = self.actor_network(state)
     action = nd.sample_multinomial(all_action_prob)
     action_prob = nd.pick(all_action_prob, action, axis=1).asnumpy()
     action = int(action.asnumpy())
     return action, action_prob
示例#10
0
    def step(self, s, num_steps_so_far):
        # s is channel-last, NHWC
        s = np.transpose(s, (0, 3, 1, 2))
        s = nd.array(s, ctx=self.args.ctx)

        value, logits = self.net(s)
        # action = nd.argmax(logits, axis=1)
        # action = self.net.sample(logits)
        # logpac = self.net.log_prob(logits, action)

        # Epsilon greedy exploration
        eps = np.maximum(1. - num_steps_so_far / self.args.annealing_end,
                         self.args.epsilon_min)
        action = nd.empty(shape=s.shape[0], ctx=self.args.ctx)
        logits_np = logits.asnumpy()
        for i in range(s.shape[0]):
            sample = np.random.random()
            if sample < eps:
                ac = random.randint(0, self.action_dim - 1)
            else:
                ac = int(np.argmax(logits[i]))
            action[i] = ac

        # Pick the probability of the chosen action
        logpac = nd.pick(logits, action, 1)

        # Reshaping the output
        logpac = logpac.asnumpy().reshape((-1))
        action = action.asnumpy().astype(np.int32).reshape((-1))
        value = value.asnumpy().reshape((-1))

        return value, action, logpac
示例#11
0
 def forward(self, x):
     x = nd.pick(x,
                 nd.broadcast_to(self._dim.data(), x.shape[0]),
                 keepdims=True)
     x -= self._split.data()
     x *= nd.relu(self._sharpness.data())
     return nd.tanh(x)
示例#12
0
文件: box.py 项目: ydlstartx/MultiDet
def label_offset(anchors, bbox, match, sample, 
                 means=(0,0,0,0), stds=(0.1,0.1,0.2,0.2), flatten=True):
    anchors = anchors.reshape((-1,4))
    N, _ = anchors.shape
    B, M, _ = bbox.shape
    anchor_x, anchor_y, anchor_w, anchor_h = corner_to_center(anchors, split=True)
    
    bbox = bbox.reshape((B,1,M,4))
    bbox = nd.broadcast_to(bbox, (B,N,M,4))
    bbox = nd.stack(*[nd.pick(bbox[:,:,:,p], match) for p in range(4)], axis=-1)
    bbox_x, bbox_y, bbox_w, bbox_h = corner_to_center(bbox, split=True)
    
    offset_x = ((bbox_x - anchor_x) / anchor_w - means[0]) / stds[0]
    offset_y = ((bbox_y - anchor_y) / anchor_h - means[1]) / stds[1]
    offset_w = (nd.log(bbox_w/anchor_w) - means[2]) / stds[2]
    offset_h = (nd.log(bbox_h/anchor_h) - means[3]) / stds[3]
    offset = nd.concat(*(offset_x, offset_y, offset_w, offset_h), dim=-1)
    sample = sample.reshape((B,N,1))
    sample = nd.broadcast_to(sample, (B,N,4)) > 0.5
    
    anchor_offset = nd.where(sample, offset, nd.zeros_like(offset))
    anchor_mask = nd.where(sample, nd.ones_like(offset), nd.zeros_like(offset))
    
    if flatten:
        anchor_offset = anchor_offset.reshape((B,-1))
        anchor_mask = anchor_mask.reshape((B,-1))
        
    return anchor_mask, anchor_offset
def cross_entropy(y_hat, y):
    """
    交叉熵损失函数
    :param y_hat:
    :param y:
    :return:
    """
    return -nd.pick(y_hat, y).log()
示例#14
0
    def forward(self, x, crisp=False):
        pick_index = nd.broadcast_to(self._dim.data(), x.shape[0])
        x = nd.pick(x, pick_index, keepdims=True)
        x = x - self._split.data()
        if (crisp == False):
            x = x * nd.relu(self._sharpness.data())

        return nd.sigmoid(x)
示例#15
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [
            _as_list(x) for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform
        # negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where(
                (pos + hard_negative) > 0,
                cls_loss,
                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(
                    cls_loss,
                    axis=0,
                    exclude=True) /
                num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(
                box_loss > self._rho,
                box_loss - 0.5 * self._rho,
                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(
                    box_loss,
                    axis=0,
                    exclude=True) /
                num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#16
0
    def _seq_score(self, state_feats, tag_seq):
        """
        :param state_feats: shape=(batch_size, seq_len, tag_size)
        :param tag_seq: shape=(batch_size, seq_len)
        :return: shape=(batch_size,)
        """
        # state_feats_tmp: shape=(seq_len, batch_size, tag_size)
        state_feats_tmp = state_feats.transpose((1, 0, 2))
        # tag_seq_tmp: shape=(seq_len, batch_size)
        tag_seq_tmp = tag_seq.transpose()

        score = nd.pick(state_feats_tmp[0], tag_seq_tmp[0], axis=1)
        if state_feats_tmp.shape[0] > 1:
            for idx, feat in enumerate(state_feats_tmp[1:]):
                score = score + nd.pick(feat, tag_seq_tmp[idx + 1], axis=1) + \
                        nd.pick(self._transitions.data()[tag_seq_tmp[idx]], tag_seq_tmp[idx + 1], axis=1)

        return score
示例#17
0
 def hybrid_forward(self,F,pred,label,alpha=1.0,gamma=0,weight=1.0):
     #调用真正的SoftMaxCrossEntropyLoss函数
     #首先对最后一维进行softmax运算
     pred_prob = F.softmax(pred)
      #选出相应的类别
     output = nd.pick(pred_prob,label,axis=self.axis)
     #print(output.shape)
     #计算损失函数
     loss = F.mean((-alpha*((1-output)**gamma)*output.log()),axis=self.axis)
     return loss
示例#18
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        pos_ct = [ct > 0 for ct in cls_target]
        num_pos = [ct.sum() for ct in pos_ct]
        num_pos_all = sum([p.asscalar() for p in num_pos])
        # print ('num_pos_all: {}'.format(num_pos_all))
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [
                nd.sum(cp * 0) + nd.sum(bp * 0)
                for cp, bp in zip(cls_pred, box_pred)
            ]
            return sum_losses, cls_losses, box_losses

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            # print ('cp shape: {}'.format(cp.shape))
            # print ('bp shape: {}'.format(bp.shape))
            # print ('ct shape: {}'.format(ct.shape))
            # print ('bt shape: {}'.format(bt.shape))
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#19
0
def cross_entropy(y_hat, y):

    # 取出选中正确衣服的预测概率
    tmp = nd.pick(y_hat, y)

    # 此处把真实数据对应的概率选中,然后求ln
    # 如果都选对了,ln1 = 0

    # ln1/p = -lnp
    # (log就是求ln)
    return -tmp.log()
示例#20
0
文件: loss.py 项目: xcgoner/gluon-exp
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        # synchronize across different machines
        # print('before sync:', num_pos_all)
        if self._distributed:
            num_pos_out = nd.zeros(1, mx.cpu())
            num_pos_in = nd.zeros(1, mx.cpu()) + num_pos_all
            # allreduce only supports pushpull
            if 'allreduce' in self._kv_store_type:
                self._kv_store.pushpull(self._num_pos_key, num_pos_in, num_pos_out)
            else:
                self._kv_store.push(self._num_pos_key, num_pos_in)
                # self._kv_store._barrier()
                self._kv_store.pull(self._num_pos_key, out=num_pos_out)
            num_pos_all = num_pos_out.asscalar()
        # print('after sync:', num_pos_all)
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#21
0
    def _score_sentence(self, feats, tags):
        '''
        计算标注序列的评分
        feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size)
        tags: 长度为句子长度的列表, 列表中每个元素为一个 nd.array, 代表一批中每个词的标注的索引, 形状为:(batch_size, 1)
        '''
        batch_size = feats[0].shape[0]
        score = nd.ones((batch_size, ), ctx=self.ctx)

        # 检索一批句子符号序列的开始标签的矩阵, 形状为:(batch_size, 1)
        temp = nd.array([self.tag2idx[START_TAG]] * batch_size,
                        ctx=self.ctx).reshape((batch_size, 1))
        # 拼接, 结果形状为: (batch_size, max_seq_len + 1)
        tags = nd.concat(temp, *tags, dim=1)

        for i, feat in enumerate(feats):
            score = score + nd.pick(self.transitions.data()[tags[:, i + 1]], tags[:, i], axis=1) + \
                nd.pick(feat, tags[:, i + 1], axis=1)
        score = score + self.transitions.data()[self.tag2idx[STOP_TAG],
                                                tags[:, tags.shape[1] - 1]]
        return score
示例#22
0
        def update_score(data, states):
            """Update scores

            Args:
                data (NDArray): NDArray shape:(seq_len, batch_size, self.tagset_size)
                states (list of NDArray): [idx, tags, score]

            Returns:
                score (NDArray): NDarray shape: (batch_size,)
            """
            # feat shape: (batch_size, self.tagset_size)
            feat = data
            # tag shape:(batch_size, 1)
            idx, tags_iner, score = states
            i = int(idx.asscalar())
            score = score + nd.pick(self.transitions.data()[tags_iner[:, i + 1]],
                                    tags_iner[:, i], axis=1) + nd.pick(feat, tags_iner[:, i + 1],
                                                                       axis=1)
            idx += 1

            return feat, [idx, tags, score]
示例#23
0
    def update(self):
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.replay_buffer.sample(self.batch_size)
        with autograd.record():
            # get the Q(s,a)
            all_current_q_value = self.main_network(state_batch)
            main_q_value = nd.pick(all_current_q_value, action_batch)

            # different from DQN
            # get next action from main network, then get its Q value from target network
            all_next_q_value = self.target_network(next_state_batch).detach()  # only get gradient of main network
            max_action = nd.argmax(all_current_q_value, axis=1)
            target_q_value = nd.pick(all_next_q_value, max_action).detach()

            target_q_value = reward_batch + (1 - done_batch) * self.gamma * target_q_value

            # record loss
            loss = gloss.L2Loss()
            value_loss = loss(target_q_value, main_q_value)
        self.main_network.collect_params().zero_grad()
        value_loss.backward()
        self.optimizer.step(batch_size=self.batch_size)
示例#24
0
文件: box.py 项目: ydlstartx/MultiDet
def label_box_cls(match, sample, gt_cls, ignore_label=-1):
    B, N = match.shape
    B, M = gt_cls.shape
    # (B,N,M)
    gt_cls = gt_cls.reshape((B,1,M))
    gt_cls = nd.broadcast_to(gt_cls, (B,N,M))
    # (B,N)
    label_cls = nd.pick(gt_cls, match, axis=-1) + 1
    label_cls = nd.where(sample > 0.5, label_cls, nd.ones_like(label_cls)*ignore_label)
    label_cls = nd.where(sample < -0.5, nd.zeros_like(label_cls), label_cls)
    # (B,N)
    label_mask = label_cls > -0.5
    return label_cls, label_mask
示例#25
0
    def update(self, obs, returns, masks, actions, values, logpacs, lrnow,
               cliprange_now):
        advantages = returns - values
        advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                         1e-8)
        advantages = nd.array(advantages,
                              ctx=self.args.ctx)  # .reshape((-1, 1))

        obs = np.transpose(obs, (0, 3, 1, 2))
        obs = nd.array(obs, ctx=self.args.ctx)
        actions = nd.array(actions, ctx=self.args.ctx).reshape((-1, 1))
        values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1))
        returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1))
        oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape((-1, 1))

        # self.trainer.set_learning_rate(lrnow)

        # Auto grad
        with autograd.record():
            # Value loss
            vpred, logits = self.net(obs)
            vpred_clipped = values + nd.clip(vpred - values, -cliprange_now,
                                             cliprange_now)
            vf_loss1 = nd.square(vpred - returns)
            vf_loss2 = nd.square(vpred_clipped - returns)
            vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2))

            # Action loss
            # pi_log_prob = self.net.log_prob(logits, actions)
            pi_log_prob = nd.pick(logits, actions, 1)
            ratio = nd.exp(pi_log_prob - oldpi_log_prob)
            surr1 = ratio * advantages
            surr2 = nd.clip(ratio, 1.0 - cliprange_now,
                            1.0 + cliprange_now) * advantages
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))

            # Entropy term
            # entropy = self.net.entropy(logits)

            # Total loss
            # loss = vf_loss * self.args.value_coefficient + actor_loss
            # - entropy * self.args.entropy_coefficient
            loss = vf_loss + actor_loss

        # Compute gradients and updates
        loss.backward()
        self.trainer.step(obs.shape[0])

        return actor_loss.asscalar(), vf_loss.asscalar()  #, entropy.asscalar()
    def learn(self):
        rewards = nd.array(self.discount_and_normalized_rewards(),
                           ctx=self.ctx)
        states = nd.array(self.states, ctx=self.ctx)
        with autograd.record():
            probs = self.network(states)
            actions = nd.array(self.actions, ctx=self.ctx)

            loss = -nd.pick(probs, actions).log() * rewards
        loss.backward()
        self.optimizer.step(batch_size=len(self.states))

        # reset
        self.states = []
        self.actions = []
        self.rewards = []
示例#27
0
    def choose_batch_action(self, phi_list):
        batch_input = nd.array(phi_list, ctx=self.ctx)

        # print('choose_batch_action batch_input.shape', batch_input.shape)

        shape0 = batch_input.shape
        state = nd.array(batch_input, ctx=self.ctx).reshape((shape0[0], -1, shape0[-2], shape0[-1]))
        # print('choose_batch_action state.shape', state.shape)
        out = self.play_net(state)
        # print('choose_batch_action out.shape', out.shape)
        max_index = nd.argmax(out, axis=1)
        # print('choose_batch_action max_index.shape', max_index.shape)
        actions = max_index.astype('int')
        # print('choose_batch_action actions.shape', actions.shape)

        max_q_list = nd.pick(out, actions, 1).asnumpy().tolist()
        return actions.asnumpy().tolist(), max_q_list
示例#28
0
    def forward(self, program, parameters, index):
        program = program.transpose((1, 0, 2))
        parameters = parameters.transpose((1, 0, 2))
        bsz = program.shape[1]
        state = self.init_hidden(bsz, self.ctx)
        #print("program.shape:",program.shape)   (3, bsz, 22)
        #print("param.shape:",parameters.shape)  (3, bsz, 7)

        # program linear transform
        dim1 = program.shape
        program = program.reshape(-1, self.vocab_size + 1)
        x1 = nd.relu(self.pgm_embed(program))
        x1 = x1.reshape(dim1[0], dim1[1], -1)
        #print("program.shape after embeding:",x1.shape)   (3, bsz, 64)

        # parameter linear transform
        dim2 = parameters.shape
        parameters = parameters.reshape(-1, self.max_param)
        x2 = nd.relu(self.param_embed(parameters))
        x2 = x2.reshape(dim2[0], dim2[1], -1)
        #print("param.shape after embeding:",x2.shape)   (3, bsz, 64)

        # LSTM to aggregate programs and parameters
        x = nd.concat(x1, x2, dim=2)
        out, hidden = self.lstm(x, state)
        #print("lstm_out.shape:",out.shape)   (3, bsz, 128)
        #print("index.shape:",index.shape)    (bsz,)

        # select desired step aggregated features
        #print("index.shape:",index.shape,"out.shape:",out.shape)
        index = index.expand_dims(axis=1).broadcast_to(
            (bsz, out.shape[2])).expand_dims(axis=0)
        #index = index.expand_dims(axis = 2).transpose((1,0,2)).broadcast_to((out.shape[0],bsz, out.shape[2]))
        #print("index.shape:",index,"\nout:",out.shape)   #(1,bsz,128)
        pgm_param_feat = nd.pick(out, index, 0).squeeze()
        #print("pgm_param_feat.shape:",pgm_param_feat.shape)   #(bsz,128)
        pgm_param_feat = nd.relu(self.pgm_param_feat(pgm_param_feat))
        #print("pgm_param_feat.shape:",pgm_param_feat.shape)   (bsz,128)

        pgm_param_feat = pgm_param_feat.reshape(bsz, self.program_vector_size,
                                                1, 1, 1)
        shape = self.decoder(pgm_param_feat)

        return shape
示例#29
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)]
            return sum_losses, cls_losses, box_losses


        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1)
                                              * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#30
0
    def decode(self, x):
        batch_size = x.shape[0]
        state = self.init_hidden(batch_size, self.ctx)
        outputs_pgm = []
        outputs_param = []

        for i in range(self.seq_length):
            if i == 0:
                xt = x
            else:
                prob_pre = nd.exp(outputs_pgm[-1])
                it1 = nd.argmax(prob_pre, axis=1)
                #print("it1 decode:",it1)
                xt = self.pgm_embed(it1)
            #print("xt decode:",xt)
            output, state = self.core(xt.expand_dims(axis=0), state)

            pgm_feat1 = nd.relu(self.logit1(output.squeeze(0)))
            pgm_feat2 = self.logit2(pgm_feat1)
            pgm_score = nd.log_softmax(pgm_feat2, axis=1)

            trans_prob = nd.softmax(pgm_feat2, axis=1).detach()
            param_feat1 = nd.relu(self.regress1(output.squeeze(0)))
            param_feat2 = nd.concat(trans_prob, param_feat1, dim=1)
            param_score = self.regress2(param_feat2)
            param_score = param_score.reshape(batch_size, self.vocab_size + 1,
                                              self.max_param)

            index = nd.argmax(trans_prob, axis=1)
            index = index.expand_dims(axis=1).expand_dims(axis=2).broadcast_to(
                shape=(batch_size, 1, self.max_param)).detach()  ##
            param_score = nd.pick(param_score, index, 1)

            outputs_pgm.append(pgm_score)
            outputs_param.append(param_score)
        outputs_pgm = [_.expand_dims(axis=1) for _ in outputs_pgm]
        outputs_param = [_.expand_dims(axis=1) for _ in outputs_param]
        pgms = outputs_pgm[0]
        params = outputs_param[0]
        for i in range(1, len(outputs_pgm)):
            pgms = nd.concat(pgms, outputs_pgm[i], dim=1)
            params = nd.concat(params, outputs_param[i], dim=1)
        return [pgms, params]
示例#31
0
def get_smoothed_loss(pred, label, num_classes, trg_pad, smooth_alpha=0.1):
    pred = nd.maximum(pred, 1e-10)
    logprob = nd.log_softmax(pred)

    # cross entropy
    ce = -nd.pick(logprob, label)

    pre_class_gain = smooth_alpha / (num_classes - 1)

    # loss = (1 - smooth_alpha - pre_class_gain) * ce - pre_class_gain * sum(logprob)
    loss = (1 - smooth_alpha - pre_class_gain) * ce - nd.sum(
        pre_class_gain * logprob, axis=-1, keepdims=False)

    mask = label != trg_pad
    loss = loss * mask

    loss = nd.sum(loss) / mask.sum()

    return loss
示例#32
0
    def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None):
        """Run decoding

        Parameters
        ----------
        word_inputs : mxnet.ndarray.NDArray
            word indices of seq_len x batch_size
        tag_inputs : mxnet.ndarray.NDArray
            tag indices of seq_len x batch_size
        arc_targets : mxnet.ndarray.NDArray
            gold arc indices of seq_len x batch_size
        rel_targets : mxnet.ndarray.NDArray
            gold rel indices of seq_len x batch_size
        Returns
        -------
        tuple
            (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target
        then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a
        list of (arcs, rels).
        """
        is_train = autograd.is_training()

        def flatten_numpy(ndarray):
            """Flatten nd-array to 1-d column vector

            Parameters
            ----------
            ndarray : numpy.ndarray
                input tensor

            Returns
            -------
            numpy.ndarray
                A column vector

            """
            return np.reshape(ndarray, (-1,), 'F')

        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32)
        num_tokens = int(np.sum(mask))  # non padding, non root token number

        if is_train or arc_targets is not None:
            mask_1D = flatten_numpy(mask)
            mask_1D_tensor = nd.array(mask_1D)

        unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK)
        word_embs = self.word_embs(nd.array(unked_words, dtype='int'))
        if self.pret_word_embs:
            word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs))
        tag_embs = self.tag_embs(nd.array(tag_inputs))

        # Dropout
        emb_inputs = nd.concat(word_embs, tag_embs, dim=2)  # seq_len x batch_size

        top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size,
                           dropout_x=self.dropout_lstm_input if is_train else 0)
        top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp)

        W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data()
        W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data()
        dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head)
        dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0],
                                                                                       p=self.dropout_mlp)
        dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1])
        dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:]
        head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:]

        W_arc = self.arc_W.data()
        arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1,
                              bias_x=True, bias_y=False)
        # (#head x #dep) x batch_size

        flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size))
        # (#head ) x (#dep x batch_size)

        arc_preds = arc_logits.argmax(0)
        # seq_len x batch_size

        if is_train or arc_targets is not None:
            correct = np.equal(arc_preds.asnumpy(), arc_targets)
            arc_correct = correct.astype(np.float32) * mask
            arc_accuracy = np.sum(arc_correct) / num_tokens
            targets_1D = flatten_numpy(arc_targets)
            losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
            arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            arc_probs = np.transpose(
                np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F'))
        # #batch_size x #dep x #head

        W_rel = self.rel_W.data()
        rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size,
                              num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
        # (#head x rel_size x #dep) x batch_size

        flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size))
        # (#head x rel_size) x (#dep x batch_size)

        _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape(
            seq_len * batch_size, 1)
        _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size))

        partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0)
        # (rel_size) x (#dep x batch_size)

        if is_train or arc_targets is not None:
            rel_preds = partial_rel_logits.argmax(0)
            targets_1D = flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D))
            rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(),
                                                (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F'))
        # batch_size x #dep x #head x #nclasses

        if is_train or arc_targets is not None:
            loss = arc_loss + rel_loss
            correct = rel_correct * flatten_numpy(arc_correct)
            overall_accuracy = np.sum(correct) / num_tokens

        if is_train:
            return arc_accuracy, rel_accuracy, overall_accuracy, loss

        outputs = []

        for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs):
            # parse sentences one by one
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            arc_pred = arc_argmax(arc_prob, sent_len, msk)
            rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred]
            rel_pred = rel_argmax(rel_prob, sent_len)
            outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len]))

        if arc_targets is not None:
            return arc_accuracy, rel_accuracy, overall_accuracy, outputs
        return outputs