Пример #1
0
    def layerwise_relevance_zclip(self, out, use_bias=False, **kwargs):
        if self._in is None:
            raise RuntimeError('Block has not yet executed forward_logged!')
        R = out
        a = self._in[0]
        z = self._out
        weight = self.weight.data(ctx=a.context)
        wplus = nd.maximum(0., weight)
        wminus = nd.minimum(0., weight)

        bplus = None
        bminus = None
        if use_bias is not None:
            bias = self.bias.data(ctx=a.context)
            bplus = nd.maximum(0., bias)
            bminus = nd.minimum(0., bias)

        alpha = z > 0.
        beta = z < 0.

        a.attach_grad()
        with autograd.record():
            zplus = self._forward(data=a, weight=wplus, bias=bplus)
        cplus, = autograd.grad(zplus,
                               a,
                               head_grads=alpha * R / (zplus + (zplus == 0.)))

        with autograd.record():
            zminus = self._forward(data=a, weight=wminus, bias=bminus)
        cminus, = autograd.grad(zminus,
                                a,
                                head_grads=beta * R / (zminus +
                                                       (zminus == 0.)))

        return a * (cplus - cminus)
Пример #2
0
    def layerwise_relevance_zb(self,
                               out,
                               lo=-1,
                               hi=1,
                               use_bias=False,
                               **kwargs):
        if self._in is None:
            raise RuntimeError('Block has not yet executed forward_logged!')
        R = out
        a = self._in[0]
        weight = self.weight.data(ctx=a.context)
        wplus = nd.maximum(0., weight)
        wminus = nd.minimum(0., weight)

        bias = None
        bplus = None
        bminus = None
        if use_bias is not None:
            bias = self.bias.data(ctx=a.context)
            bplus = nd.maximum(0., bias)
            bminus = nd.minimum(0., bias)

        upper = nd.ones_like(a) * hi
        lower = nd.ones_like(a) * lo
        a.attach_grad()
        upper.attach_grad()
        lower.attach_grad()
        with autograd.record():
            zlh = (self._forward(a, weight, bias) -
                   self._forward(lower, wplus, bplus) -
                   self._forward(upper, wminus, bminus))
        zlh.backward(out_grad=R / (zlh + (zlh == 0.)))
        return a * a.grad + upper * upper.grad + lower * lower.grad
Пример #3
0
def bgr2hsi(x):
    """ x:n,c(b,g,r),w,h
        return n,c(h,s,i),w,h
    """
    sum_RGB = nd.sum(x.astype('float32'), axis=1)
    R = x[:, 0, :, :].astype('float32')
    G = x[:, 1, :, :].astype('float32')
    B = x[:, 2, :, :].astype('float32')

    r = (R + eps) / (sum_RGB + 3 * eps)
    g = (G + eps) / (sum_RGB + 3 * eps)
    b = (B + eps) / (sum_RGB + 3 * eps)

    cossita = (2 * r - g - b) / (2 * ((r - g)**2 + (r - b) *
                                      (g - b))**(1.0 / 2) + eps)
    cossita_cilp = nd.clip(cossita, -1.0, 1.0)

    sita = nd.arccos(cossita_cilp)

    h = (nd.where(g >= b, sita, 2 * math.pi - sita)).expand_dims(axis=1)

    s = (1 - 3 * nd.minimum(nd.minimum(r, g), b)).expand_dims(axis=1)
    s = nd.clip(s, 0., 1.)

    i = ((R + G + B) / 3).expand_dims(axis=1)

    return nd.concat(h, s, i, dim=1)
Пример #4
0
def get_iou(predict, target, mode=1):
    '''
    @input:
        predict: m*n*4, 
        target :(cltrb), 
        mode   :1:target is cltrb 
                2:target is cyxhw 
    @return
        (m*n*1) ndarray
    '''
    l, t, r, b = predict.split(num_outputs=4, axis=-1)
    if mode == 1:
        l2 = target[1]
        t2 = target[2]
        r2 = target[3]
        b2 = target[4]
    elif mode == 2:
        l2 = target[2] - target[4]/2
        t2 = target[1] - target[3]/2
        r2 = target[2] + target[4]/2
        b2 = target[1] + target[3]/2
    else: print('mode should be int 1 or 2')

    i_left = nd.maximum(l2, l)
    i_top = nd.maximum(t2, t)
    i_right = nd.minimum(r2, r)
    i_bottom = nd.minimum(b2, b)
    iw = nd.maximum(i_right - i_left, 0.)
    ih = nd.maximum(i_bottom - i_top, 0.)
    inters = iw * ih
    predict_area = (r-l)*(b-t)
    target_area = target[3] * target[4]
    ious = inters/(predict_area + target_area - inters) 
    return ious # 1344x3x1
Пример #5
0
    def layerwise_relevance_zclip(self, out, use_bias=False, **kwargs):
        if self._in is None:
            raise RuntimeError('Block has not yet executed forward_logged!')
        R = out
        a = self._in[0]
        z = self._out
        weight = self.weight.data(ctx=a.context)
        wplus = nd.maximum(0., weight)
        wminus = nd.minimum(0., weight)

        bplus = None
        bminus = None
        if use_bias is not None:
            bias = self.bias.data(ctx=a.context)
            bplus = nd.maximum(0., bias)
            bminus = nd.minimum(0., bias)

        alpha = z > 0.
        beta = z < 0.

        a.attach_grad()
        with autograd.record():
            zplus = self._forward(data=a, weight=wplus, bias=bplus)
        cplus, = autograd.grad(zplus, a, head_grads=alpha*R/(zplus + (zplus == 0.)))

        with autograd.record():
            zminus = self._forward(data=a, weight=wminus, bias=bminus)
        cminus, = autograd.grad(zminus, a, head_grads=beta*R/(zminus + (zminus == 0.)))

        return a*(cplus - cminus)
Пример #6
0
def get_dis(data, mean, dis_method='iou'):
    if dis_method == 'iou':
        # data = bs*(w, h) ndarray
        # mean = 1*(w, h) ndarray
        # |--------|-----|
        # | inters |     |
        # |--------|     |  h
        # |              |
        # |--------------|
        #        w
        data_w, data_h = data.split(num_outputs=2, axis=-1)
        mean_w, mean_h = mean

        inters_w = nd.minimum(data_w, mean_w)
        inters_h = nd.minimum(data_h, mean_h)
        inters = inters_w * inters_h

        data_area = data_w * data_h
        mean_area = mean_w * mean_h
        ious = inters / (data_area + mean_area - inters)
        distance = 1 / ious

    elif dis_method == 'L2':
        vec = data - mean
        distance = nd.norm(vec, ord=2, axis=-1).reshape((-1, 1))

    return distance
Пример #7
0
    def layerwise_relevance_zb(self, out, lo=-1, hi=1, use_bias=False, **kwargs):
        if self._in is None:
            raise RuntimeError('Block has not yet executed forward_logged!')
        R = out
        a = self._in[0]
        weight = self.weight.data(ctx=a.context)
        wplus = nd.maximum(0., weight)
        wminus = nd.minimum(0., weight)

        bias = None
        bplus = None
        bminus = None
        if use_bias is not None:
            bias = self.bias.data(ctx=a.context)
            bplus = nd.maximum(0., bias)
            bminus = nd.minimum(0., bias)

        upper = nd.ones_like(a)*hi
        lower = nd.ones_like(a)*lo
        a.attach_grad()
        upper.attach_grad()
        lower.attach_grad()
        with autograd.record():
            zlh = ( self._forward(a, weight, bias)
                  - self._forward(lower, wplus, bplus)
                  - self._forward(upper, wminus, bminus)
                  )
        zlh.backward(out_grad=R/(zlh + (zlh == 0.)))
        return a*a.grad + upper*upper.grad + lower*lower.grad
def test_minimum():
    x = mx.nd.ones(LARGE_X) * 3
    y = mx.nd.ones(LARGE_X) * 2
    z = nd.minimum(x, y)
    assert z[0] == 2
    assert z[-1] == 2
    z = nd.minimum(x, 5)
    assert z[0] == 3
    assert z[-1] == 3
Пример #9
0
    def _compute_yolo_iou(self, F, boxes1, boxes2):
        '''
        IoU of corresponding anchors
        '''

        # to corner representation
        x11 = boxes1[:, :, :, :, 0] - boxes1[:, :, :, :, 2] / 2.0
        y11 = boxes1[:, :, :, :, 1] - boxes1[:, :, :, :, 3] / 2.0
        x12 = boxes1[:, :, :, :, 0] + boxes1[:, :, :, :, 2] / 2.0
        y12 = boxes1[:, :, :, :, 1] + boxes1[:, :, :, :, 3] / 2.0
        boxes1_new = nd.stack([x11, y11, x12, y12], axis=-1)
        x21 = boxes2[:, :, :, :, 0] - boxes2[:, :, :, :, 2] / 2.0
        y21 = boxes2[:, :, :, :, 1] - boxes2[:, :, :, :, 3] / 2.0
        x22 = boxes2[:, :, :, :, 0] + boxes2[:, :, :, :, 2] / 2.0
        y22 = boxes2[:, :, :, :, 1] + boxes2[:, :, :, :, 3] / 2.0
        boxes2_new = nd.stack([x21, y21, x22, y22], axis=-1)

        # calculating 2 border points
        upperleft = nd.maximum(boxes1_new[:, :, :, :, :2],
                               boxes2_new[:, :, :, :, :2])
        lowerright = nd.minimum(boxes1_new[:, :, :, :, 2:],
                                boxes2_new[:, :, :, :, 2:])

        intersection_dims = nd.maximum(0.0, lowerright - upperleft)
        intersection_area = intersection_dims[:, :, :, :,
                                              0] * intersection_dims[:, :, :, :,
                                                                     1]

        area1 = boxes1_new[:, :, :, :, 3] * boxes1_new[:, :, :, :, 2]
        area2 = boxes2_new[:, :, :, :, 3] * boxes2_new[:, :, :, :, 2]

        union_area = nd.maximum(1e-8, area1 + area2 - intersection_area)

        return nd.clip(intersection_area / union_area, a_min=0.0, a_max=1.0)
Пример #10
0
    def old_update(self, b_s, b_a, b_r, b_logpac):
        b_s = nd.array(b_s, ctx=self.args.ctx).reshape(
            (-1, self.observation_dim))
        b_a = nd.array(b_a, ctx=self.args.ctx).reshape((-1, self.action_dim))
        b_r = nd.array(b_r, ctx=self.args.ctx).reshape((-1, 1))
        b_oldpi_log_prob = nd.array(b_logpac, ctx=self.args.ctx).reshape(
            (-1, self.action_dim))

        with autograd.record():
            # Value loss
            v_pred, mu, sigma = self.net(b_s)
            advantage = b_r - v_pred
            vf_loss = nd.mean(nd.square(advantage))

            # Detach from the computation graph
            advantage = advantage.detach()

            # Action loss
            pi_log_prob = self.net.log_prob(b_a, mu, sigma)
            ratio = nd.exp(pi_log_prob - b_oldpi_log_prob)
            surr1 = ratio * advantage
            surr2 = nd.clip(ratio, 1.0 - self.args.clip_param,
                            1.0 + self.args.clip_param) * advantage
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))
            entropy = self.net.entropy(sigma)

            # Total (maximize entropy to encourage exploration)
            loss = vf_loss * self.args.value_coefficient + actor_loss \
                    - entropy * self.args.entropy_coefficient

        loss.backward()
        self.trainer.step(b_s.shape[0])
Пример #11
0
def sample(match, cls_pred, iou, ratio=3, min_sample=0, threshold=0.5, do=True):
    if do is False:
        ones = nd.ones_like(match)
        sample = nd.where(match > -0.5, ones, ones*-1)
        return sample
    sample = nd.zeros_like(match)
    num_pos = nd.sum(match > -0.5, axis=-1)
    requre_neg = ratio * num_pos
    neg_mask = nd.where(match < -0.5, nd.max(iou, axis=-1) < threshold, sample)
    max_neg = neg_mask.sum(axis=-1)
    num_neg = nd.minimum(max_neg, nd.maximum(requre_neg, min_sample)).astype('int')
   
    neg_prob = cls_pred[:,:,0]
    max_value = nd.max(cls_pred, axis=-1, keepdims=True)
    score = max_value[:,:,0] - neg_prob + nd.log(
                                   nd.sum(
                                   nd.exp(cls_pred-max_value), axis=-1))

    score = nd.where(neg_mask, score, nd.zeros_like(score))
    argmax = nd.argsort(score, axis=-1, is_ascend=False)
    sample = nd.where(match > -0.5, nd.ones_like(sample), sample)
    
    for i, num in enumerate(num_neg):
        sample[i, argmax[i,:num.asscalar()]] = -1
    
    return sample
def clip_grad(grads: Union[Generator[NDArray, NDArray, NDArray], List[NDArray],
                           Tuple[NDArray]],
              clip_method: GradientClippingMethod,
              clip_val: float,
              inplace=True) -> List[NDArray]:
    """
    Clip gradient values inplace
    :param grads: gradients to be clipped
    :param clip_method: clipping method
    :param clip_val: clipping value. Interpreted differently depending on clipping method.
    :param inplace: modify grads if True, otherwise create NDArrays
    :return: clipped gradients
    """
    output = list(grads) if inplace else list(nd.empty(g.shape) for g in grads)
    if clip_method == GradientClippingMethod.ClipByGlobalNorm:
        norm_unclipped_grads = global_norm(grads)
        scale = clip_val / (norm_unclipped_grads.asscalar() + 1e-8
                            )  # todo: use branching operators?
        if scale < 1.0:
            for g, o in zip(grads, output):
                nd.broadcast_mul(g, nd.array([scale]), out=o)
    elif clip_method == GradientClippingMethod.ClipByValue:
        for g, o in zip(grads, output):
            g.clip(-clip_val, clip_val, out=o)
    elif clip_method == GradientClippingMethod.ClipByNorm:
        for g, o in zip(grads, output):
            nd.broadcast_mul(g,
                             nd.minimum(1.0, clip_val / (g.norm() + 1e-8)),
                             out=o)
    else:
        raise KeyError('Unsupported gradient clipping method')
    return output
Пример #13
0
def clip(tensor, a_min=None, a_max=None, inplace=False):
    if a_min is not None and a_max is not None:
        if inplace:
            tensor[:] = np.maximum(np.minimum(tensor, a_max), a_min)
        else:
            tensor = np.maximum(np.minimum(tensor, a_max), a_min)
    elif min is not None:
        if inplace:
            tensor[:] = np.maximum(tensor, a_min)
        else:
            tensor = np.maximum(tensor, a_min)
    elif min is not None:
        if inplace:
            tensor[:] = np.minimum(tensor, a_max)
        else:
            tensor = np.minimum(tensor, a_max)
    return tensor
Пример #14
0
def clip(tensor, a_min=None, a_max=None, indlace=False):
    if a_min is not None and a_max is not None:
        if indlace:
            nd.max(nd.min(tensor, a_max, out=tensor), a_min, out=tensor)
        else:
            tensor = nd.maximum(nd.minimum(tensor, a_max), a_min)
    elif min is not None:
        if indlace:
            nd.max(tensor, a_min, out=tensor)
        else:
            tensor = nd.maximum(tensor, a_min)
    elif max is not None:
        if indlace:
            nd.min(tensor, a_max, out=tensor)
        else:
            tensor = nd.minimum(tensor, a_max)
    return tensor
Пример #15
0
def get_iou(predict, target, mode=1):
    '''
    Parameter:
    ----------
    predict: mxnet.ndarray
      channels are {???}*4
    target: mxnet.ndarray
      target.shape = (5)
    mode: [1,2]
      1: target format is cltrb
      2: target fromat is cyxhw

    Returns
    ----------
    ious: mxnet.ndarray
      ious between predict and target, dimasion is {???}x1
    '''
    l, t, r, b = predict.split(num_outputs=4, axis=-1)
    if mode == 1:
        l2 = target[1]
        t2 = target[2]
        r2 = target[3]
        b2 = target[4]
    elif mode == 2:
        l2 = target[2] - target[4] / 2
        t2 = target[1] - target[3] / 2
        r2 = target[2] + target[4] / 2
        b2 = target[1] + target[3] / 2
    else:
        print('mode should be int 1 or 2')

    i_left = nd.maximum(l2, l)
    i_top = nd.maximum(t2, t)
    i_right = nd.minimum(r2, r)
    i_bottom = nd.minimum(b2, b)
    iw = nd.maximum(i_right - i_left, 0.)
    ih = nd.maximum(i_bottom - i_top, 0.)
    inters = iw * ih
    predict_area = (r - l) * (b - t)
    target_area = target[3] * target[4]
    ious = inters / (predict_area + target_area - inters)
    return ious  # 1344x3x1
Пример #16
0
def fltrust(epoch, gradients, net, lr, f, byz):

    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients
    ]
    # let the malicious clients (first f clients) perform the byzantine attack
    param_list = byz(epoch, param_list, net, lr, f)
    n = len(param_list
            ) - 1  # -1 so as to not include the gradient of the server model

    # use the last gradient (server update) as the trusted source
    #print(nd.array(param_list[-1]).shape)
    baseline = nd.array(param_list[-1]).squeeze()
    #print(baseline.shape)
    cos_sim = []
    new_param_list = []

    #print(param_list[0].shape)
    print(nd.norm(baseline))
    # compute cos similarity
    for each_param_list in param_list:
        each_param_array = nd.array(each_param_list).squeeze()
        cos_sim.append(
            nd.dot(baseline, each_param_array) / (nd.norm(baseline) + 1e-9) /
            (nd.norm(each_param_array) + 1e-9))

    cos_sim = nd.stack(*cos_sim)[:-1]
    #print(cos_sim)
    cos_sim = nd.maximum(cos_sim, 0)  # relu
    cos_sim = nd.minimum(cos_sim, 1)
    #print(cos_sim)
    normalized_weights = cos_sim / (nd.sum(cos_sim) + 1e-9
                                    )  # weighted trust score
    #print(normalized_weights)

    # normalize the magnitudes and weight by the trust score
    for i in range(n):
        new_param_list.append(param_list[i] * normalized_weights[i] /
                              (nd.norm(param_list[i]) + 1e-9) *
                              nd.norm(baseline))
        #print(normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline))
    #print("normalized weights: " + str(normalized_weights[i]))
    #print("baseline: " + str(nd.norm(baseline)))

    # update the global model
    global_update = nd.sum(nd.concat(*new_param_list, dim=1), axis=-1)
    idx = 0
    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req == 'null':
            continue
        #print(global_update[idx:(idx+param.data().size)])
        param.set_data(param.data() - lr * global_update[idx:(
            idx + param.data().size)].reshape(param.data().shape))
        idx += param.data().size
Пример #17
0
def bbox_iou(lhs, rhs, x1y1x2y2=True):
    if x1y1x2y2:
        b1_xmin, b1_ymin, b1_xmax, b1_ymax = nd.split(lhs,
                                                      axis=-1,
                                                      num_outputs=4)
        b2_xmin, b2_ymin, b2_xmax, b2_ymax = nd.split(rhs,
                                                      axis=-1,
                                                      num_outputs=4)
    else:
        b1_x, b1_y, b1_w, b1_h = nd.split(lhs, axis=-1, num_outputs=4)
        b2_x, b2_y, b2_w, b2_h = nd.split(rhs, axis=-1, num_outputs=4)

        b1_xmin, b1_xmax = b1_x - b1_w / 2., b1_x + b1_w / 2.
        b1_ymin, b1_ymax = b1_y - b1_h / 2., b1_y + b1_h / 2.
        b2_xmin, b2_xmax = b2_x - b2_w / 2., b2_x + b2_w / 2.
        b2_ymin, b2_ymax = b2_y - b2_h / 2., b2_y + b2_h / 2.

    # Intersection area
    MAX = 1e5
    inter_w = nd.clip(
        nd.minimum(b1_xmax, b2_xmax) - nd.maximum(b1_xmin, b2_xmin), 0, MAX)
    inter_h = nd.clip(
        nd.minimum(b1_ymax, b2_ymax) - nd.maximum(b1_ymin, b2_ymin), 0, MAX)
    # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w)
    # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h)
    inter = inter_w * inter_h

    # Union Area
    w1, h1 = b1_xmax - b1_xmin, b1_ymax - b1_ymin
    w2, h2 = b2_xmax - b2_xmin, b2_ymax - b2_ymin
    # w1 = F.where(w1 < 0., F.zeros_like(w1), w1)
    # h1 = F.where(h1 < 0., F.zeros_like(h1), h1)
    # w2 = F.where(w2 < 0., F.zeros_like(w2), w2)
    # h2 = F.where(h2 < 0., F.zeros_like(h2), h2)
    union = (w1 * h1 + 1e-16) + w2 * h2 - inter

    iou = inter / union  # iou
    return iou
Пример #18
0
    def update(self, obs, returns, masks, actions, values, logpacs, lrnow,
               cliprange_now):
        advantages = returns - values
        advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                         1e-8)
        advantages = nd.array(advantages,
                              ctx=self.args.ctx)  # .reshape((-1, 1))

        obs = np.transpose(obs, (0, 3, 1, 2))
        obs = nd.array(obs, ctx=self.args.ctx)
        actions = nd.array(actions, ctx=self.args.ctx).reshape((-1, 1))
        values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1))
        returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1))
        oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape((-1, 1))

        # self.trainer.set_learning_rate(lrnow)

        # Auto grad
        with autograd.record():
            # Value loss
            vpred, logits = self.net(obs)
            vpred_clipped = values + nd.clip(vpred - values, -cliprange_now,
                                             cliprange_now)
            vf_loss1 = nd.square(vpred - returns)
            vf_loss2 = nd.square(vpred_clipped - returns)
            vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2))

            # Action loss
            # pi_log_prob = self.net.log_prob(logits, actions)
            pi_log_prob = nd.pick(logits, actions, 1)
            ratio = nd.exp(pi_log_prob - oldpi_log_prob)
            surr1 = ratio * advantages
            surr2 = nd.clip(ratio, 1.0 - cliprange_now,
                            1.0 + cliprange_now) * advantages
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))

            # Entropy term
            # entropy = self.net.entropy(logits)

            # Total loss
            # loss = vf_loss * self.args.value_coefficient + actor_loss
            # - entropy * self.args.entropy_coefficient
            loss = vf_loss + actor_loss

        # Compute gradients and updates
        loss.backward()
        self.trainer.step(obs.shape[0])

        return actor_loss.asscalar(), vf_loss.asscalar()  #, entropy.asscalar()
Пример #19
0
    def update(self):
        self.total_train_steps += 1
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory_buffer.sample(self.batch_size)

        # --------------optimize the critic network--------------------
        with autograd.record():
            # choose next action according to target policy network
            next_action_batch = self.target_actor_network(next_state_batch)
            noise = nd.normal(loc=0, scale=self.policy_noise, shape=next_action_batch.shape, ctx=self.ctx)
            # with noise clip
            noise = nd.clip(noise, a_min=-self.noise_clip, a_max=self.noise_clip)
            next_action_batch = next_action_batch + noise
            clipped_action = self.action_clip(next_action_batch)

            # get target q value
            target_q_value1 = self.target_critic_network1(next_state_batch, clipped_action)
            target_q_value2 = self.target_critic_network2(next_state_batch, clipped_action)
            target_q_value = nd.minimum(target_q_value1, target_q_value2).squeeze()
            target_q_value = reward_batch + (1.0 - done_batch) * (self.gamma * target_q_value)

            # get current q value
            current_q_value1 = self.main_critic_network1(state_batch, action_batch)
            current_q_value2 = self.main_critic_network2(state_batch, action_batch)
            loss = gloss.L2Loss()

            value_loss1 = loss(current_q_value1, target_q_value.detach())
            value_loss2 = loss(current_q_value2, target_q_value.detach())

        self.main_critic_network1.collect_params().zero_grad()
        value_loss1.backward()
        self.critic1_optimizer.step(self.batch_size)

        self.main_critic_network2.collect_params().zero_grad()
        value_loss2.backward()
        self.critic2_optimizer.step(self.batch_size)

        # ---------------optimize the actor network-------------------------
        if self.total_train_steps % self.policy_update == 0:
            with autograd.record():
                pred_action_batch = self.main_actor_network(state_batch)
                actor_loss = -nd.mean(self.main_critic_network1(state_batch, pred_action_batch))

            self.main_actor_network.collect_params().zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step(1)

            self.soft_update(self.target_actor_network, self.main_actor_network)
            self.soft_update(self.target_critic_network1, self.main_critic_network1)
            self.soft_update(self.target_critic_network2, self.main_critic_network2)
Пример #20
0
    def forward(self, x=0):
        if (mx.autograd.is_training()):
            u = nd.random.uniform(0, 1)
            s = nd.log(u) - nd.log(1 - u) + self._qz_loga.data()
            if (self._temperature == 0):
                s = nd.sign(s)
            else:
                s = nd.sigmoid(s / self._temperature)

        else:
            s = nd.sigmoid(self._qz_loga.data())

        s = s * (self._limit_hi - self._limit_lo) + self._limit_lo

        return nd.minimum(1, nd.maximum(s, 0))
Пример #21
0
            def _go_below(x):
                lower = nd.min(x, axis=0)
                lower = nd.minimum(lower, node._box._min_list.data())
                upper = nd.max(x, axis=0)
                upper = nd.maximum(upper, node._box._max_list.data())
                node._box._init_param("min_list", lower)
                node._box._init_param("max_list", upper)

                if (self._structure[node] is not None):
                    l_node = next(
                        key for key, value in self._structure[node].items()
                        if value == -1)
                    r_node = next(
                        key for key, value in self._structure[node].items()
                        if value == 1)
                    decision = node._decision.forward(x, crisp=True)
                    _shard(decision, x, _extend(l_node), _extend(r_node))
Пример #22
0
    def update(self, obs, returns, masks, actions, values, logpacs):
        advantages = returns - values
        # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

        advantages = nd.array(advantages, ctx=self.args.ctx).reshape((-1, 1))
        obs = nd.array(obs, ctx=self.args.ctx).reshape(
            (-1, self.observation_dim))
        actions = nd.array(actions, ctx=self.args.ctx).reshape(
            (-1, self.action_dim))
        values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1))
        returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1))
        oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape(
            (-1, self.action_dim))

        # Learning rate scheduling
        # self.trainer.set_learning_rate(lr)

        # Auto grad
        with autograd.record():
            # Value loss
            vpred, mu, sigma = self.net(obs)
            vpred_clipped = values + nd.clip(
                vpred - values, -self.args.clip_param, self.args.clip_param)
            vf_loss1 = nd.square(vpred - returns)
            vf_loss2 = nd.square(vpred_clipped - returns)
            vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2))

            # Action loss
            pi_log_prob = self.net.log_prob(actions, mu, sigma)
            ratio = nd.exp(pi_log_prob - oldpi_log_prob)
            surr1 = ratio * advantages
            surr2 = nd.clip(ratio, 1.0 - self.args.clip_param,
                            1.0 + self.args.clip_param) * advantages
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))

            # Entropy term
            entropy = self.net.entropy(sigma)

            # Total loss
            loss = vf_loss * self.args.value_coefficient + actor_loss \
                        - entropy * self.args.entropy_coefficient

        # Compute gradients and updates
        loss.backward()
        self.trainer.step(obs.shape[0])
Пример #23
0
def box_iou(b1, b2):
    '''Return iou tensor

    Parameters
    ----------
    b1: tensor, shape=(i1,...,iN, 4), xywh
    b2: tensor, shape=(j, 4), xywh

    Returns
    -------
    iou: tensor, shape=(i1,...,iN, j)

    '''

    # Expand dim to apply broadcasting.
    b1 = nd.expand_dims(b1, -2)
    b1_xy = b1[:, :, :2]
    b1_wh = b1[:, :, 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    # Expand dim to apply broadcasting.
    b2 = nd.expand_dims(b2, 0)
    b2_xy = b2[:, :, :2]
    b2_wh = b2[:, :, 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = nd.maximum(b1_mins, b2_mins)
    intersect_maxes = nd.minimum(b1_maxes, b2_maxes)
    intersect_wh = nd.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[:, :, 0] * intersect_wh[:, :, 1]
    b1_area = b1_wh[:, :, 0] * b1_wh[:, :, 1]
    b2_area = b2_wh[:, :, 0] * b2_wh[:, :, 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)

    return iou
Пример #24
0
def calIOU(anchor, gt):
    assert len(anchor.shape) in (1,2,3)
    assert len(gt.shape) in (1,2,3)
    
    anchor = anchor.reshape((-1,4))
    if len(gt.shape) < 3:
        gt = gt.reshape((1,1,4)) if len(gt.shape) == 1 else nd.expand_dims(gt, axis=0)
    anchor = nd.expand_dims(anchor, axis=1)
    gt = nd.expand_dims(gt, axis=1)
    
    max_tl = nd.maximum(nd.take(anchor, nd.array([0,1]), axis=-1), nd.take(gt, nd.array([0,1]), axis=-1))
    min_br = nd.minimum(nd.take(anchor, nd.array([2,3]), axis=-1), nd.take(gt, nd.array([2,3]), axis=-1))
    
    area = nd.prod(min_br-max_tl, axis=-1)
    i = nd.where((max_tl >= min_br).sum(axis=-1), nd.zeros_like(area), area)
    
    anchor_area = nd.prod(anchor[:,:,2:]-anchor[:,:,:2], axis=-1)
    gt_area = nd.prod(gt[:,:,:,2:]-gt[:,:,:,:2], axis=-1)
    total_area = anchor_area + gt_area - i
    iou = i / total_area
    
    return iou
Пример #25
0
def intersect(box_a, box_b):
    """
    We resize both tensors to [A,B,2] without new malloc:
    [A,2] -> [A,1,2] -> [A,B,2]
    [B,2] -> [1,B,2] -> [A,B,2]
    Then we compute the area of intersect between box_a and box_b.
    Args:
      box_a: (tensor) bounding boxes, Shape: [A,4].
      box_b: (tensor) bounding boxes, Shape: [B,4].
    Return:
      (tensor) intersection area, Shape: [A,B].
    """

    A = box_a.shape[0]
    B = box_b.shape[0]
    max_xy = nd.minimum(
        box_a[:, 2:].expand_dims(axis=1).repeat(axis=1, repeats=B),
        box_b[:, 2:].expand_dims(axis=0).repeat(axis=0, repeats=A))
    min_xy = nd.maximum(
        box_a[:, :2].expand_dims(axis=1).repeat(axis=1, repeats=B),
        box_b[:, :2].expand_dims(axis=0).repeat(axis=0, repeats=A))
    inter = nd.clip((max_xy - min_xy), 0, np.nan)
    return inter[:, :, 0] * inter[:, :, 1]
Пример #26
0
    def update(self, obs, returns, actions, advantages, cliprange_now,
            entropy_coeff):
        advantages = nd.array(advantages)
        actions = nd.array(actions)
        returns = nd.array(returns)

        with autograd.record():
            _, old_logits = self.oldpi.forward(obs)
            old_logp = self.oldpi.logp(old_logits, actions)

            new_vpred, new_logits = self.pi.forward(obs)
            new_vpred = new_vpred.reshape(new_vpred.shape[:-1])
            new_logp = self.pi.logp(new_logits, actions)

            # Action loss
            ratio = nd.exp(new_logp - old_logp)
            surr1 = ratio * advantages
            surr2 = nd.clip(ratio, 1.0 - cliprange_now, 1.0 + cliprange_now) * advantages
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))

            # Value loss
            vf_loss1 = nd.square(new_vpred - returns)
            vf_loss = nd.mean(vf_loss1)

            # Entropy term
            entrpy = self.pi.entropy(new_logits)
            mean_entrpy = nd.mean(entrpy)
            ent_loss = (-entropy_coeff) * mean_entrpy

            loss = vf_loss + actor_loss + ent_loss

        # Compute gradients and updates
        loss.backward()
        self.trainer.step(1) # Loss are already normalized

        return actor_loss.asscalar(), vf_loss.asscalar(), ent_loss.asscalar()
Пример #27
0
            def _go_above(x, tau):
                lower = nd.min(x, axis=0)
                lower = nd.minimum(lower, node._box._min_list.data())
                upper = nd.max(x, axis=0)
                upper = nd.maximum(upper, node._box._max_list.data())

                el = nd.maximum(node._box._min_list.data() - nd.min(x, axis=0),
                                0)
                eu = nd.maximum(
                    nd.max(x, axis=0) - node._box._max_list.data(), 0)
                extent = nd.sum(el + eu)
                dim = nd.random.multinomial((el + eu) / extent)

                btm = el[dim]
                top = eu[dim]
                split = nd.random.multinomial(
                    nd.concat(btm, top, dim=0) / (btm + top))
                if (split == 0):
                    split = nd.random.uniform(lower[dim],
                                              node._box._min_list.data()[dim])
                elif (split == 1):
                    split = nd.random.uniform(node._box._max_list.data()[dim],
                                              upper[dim])

                with self.name_scope():
                    p_node = self._new_node(
                        parent=node._box._parent,
                        min_list=lower,
                        max_list=upper,
                        tau=tau,
                        decision=lambda: Decision(
                            split=split, dim=dim, gate=self._new_gate),
                        embedding=node._embedding.data())
                    s_node = self._new_node(parent=p_node,
                                            embedding=node._embedding.data())
                    node._box._parent = p_node

                    if (split < node._box._min_list.data()[dim]):
                        # current node is right
                        l_node = s_node
                        r_node = node

                    elif (split > node._box._max_list.data()[dim]):
                        # current node is left
                        l_node = node
                        r_node = s_node

                    self._structure[p_node] = {l_node: -1, r_node: 1}

                    # p nodes parent also needs to reference p_node and the other child
                    if (p_node._box._parent is not None):

                        if (self._structure[p_node._box._parent][node] == -1):
                            self._structure[p_node._box._parent][p_node] = -1

                        elif (self._structure[p_node._box._parent][node] == 1):
                            self._structure[p_node._box._parent][p_node] = 1

                        self._structure[p_node._box._parent].pop(node)

                    elif (p_node._box._parent is None):
                        self._structure.move_to_end(p_node, last=False)

                    self._weightlayer.add(*[p_node._box, s_node._box])
                    self._routerlayer.add(*[p_node._decision])
                    self._embeddlayer.add(*[p_node, s_node])

                decision = p_node._decision.forward(x, crisp=True)
                _shard(decision, x, _extend(l_node), _extend(r_node))
Пример #28
0
    def update(self):
        state = nd.array([t.state for t in self.buffer], ctx=self.ctx)
        action = nd.array([t.action for t in self.buffer], ctx=self.ctx)
        reward = [t.reward for t in self.buffer]
        # next_state = nd.array([t.next_state for t in self.buffer], ctx=self.ctx)
        old_action_log_prob = nd.array([t.a_log_prob for t in self.buffer],
                                       ctx=self.ctx)

        R = 0
        Gt = []
        for r in reward[::-1]:
            R = r + self.gamma * R
            Gt.insert(0, R)
        Gt = nd.array(Gt, ctx=self.ctx)
        # sample 'ppo_update_time' times
        # sample 'batch_size' samples every time
        for i in range(self.ppo_update_times):
            assert len(self.buffer) >= self.batch_size
            sample_index = random.sample(range(len(self.buffer)),
                                         self.batch_size)
            for index in sample_index:
                # optimize the actor network
                with autograd.record():
                    Gt_index = Gt[index]
                    V = self.critic_network(state[index].reshape(1,
                                                                 -1)).detach()
                    advantage = (Gt_index - V)

                    all_action_prob = self.actor_network(state[index].reshape(
                        1, -1))
                    action_prob = nd.pick(all_action_prob, action[index])

                    ratio = action_prob / old_action_log_prob[index]
                    surr1 = ratio * advantage
                    surr2 = nd.clip(ratio, 1 - self.clip_param,
                                    1 + self.clip_param) * advantage
                    action_loss = -nd.mean(nd.minimum(surr1,
                                                      surr2))  # attention
                self.actor_network.collect_params().zero_grad()
                action_loss.backward()
                actor_network_params = [
                    p.data()
                    for p in self.actor_network.collect_params().values()
                ]
                gb.grad_clipping(actor_network_params,
                                 theta=self.clip_param,
                                 ctx=self.ctx)
                self.actor_optimizer.step(1)

                # optimize the critic network
                with autograd.record():
                    Gt_index = Gt[index]
                    V = self.critic_network(state[index].reshape(1, -1))
                    loss = gloss.L2Loss()
                    value_loss = nd.mean(loss(Gt_index, V))
                self.critic_network.collect_params().zero_grad()
                value_loss.backward()
                critic_network_params = [
                    p.data()
                    for p in self.critic_network.collect_params().values()
                ]
                gb.grad_clipping(critic_network_params,
                                 theta=self.clip_param,
                                 ctx=self.ctx)
                self.critic_optimizer.step(1)

                self.training_step += 1
        # clear buffer
        del self.buffer[:]
Пример #29
0
    def generate_targets(self, img, boxes):
        """
        img : [H, W, 3]
        boxes : [N, 5]
        """
        rh, rw, _ = img.shape
        rx = nd.arange(0, rw).reshape((1, -1))
        ry = nd.arange(0, rh).reshape((-1, 1))
        sx = nd.tile(rx, reps=(rh, 1))
        sy = nd.tile(ry, reps=(1, rw))

        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        boxes = boxes[nd.argsort(areas)]
        boxes = nd.concat(nd.zeros((1, 5)), boxes, dim=0) # for gt assign confusion
        x0, y0, x1, y1, cls = nd.split(boxes, num_outputs=5, axis=-1, squeeze_axis=True)
        n = boxes.shape[0]

        # [H, W, N]
        of_l = sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x0, axis=0), axis=0)
        of_t = sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y0, axis=0), axis=0)
        of_r = -(sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x1, axis=0), axis=0))
        of_b = -(sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y1, axis=0), axis=0))

        # [H, W, N]
        eps = 1e-5
        ctr =(nd.minimum(of_l, of_r) / nd.maximum(of_l, of_r)) * \
                (nd.minimum(of_t, of_b) / nd.maximum(of_t, of_b) + eps)
        ctr = nd.sqrt(nd.abs(ctr))
        ctr[:, :, 0] = 0

        # [H, W, N, 4]
        offsets = nd.concat(of_l.reshape(-2, 1), of_t.reshape(-2, 1),
                            of_r.reshape(-2, 1), of_b.reshape(-2, 1), dim=-1)

        # fh = int(np.ceil(((rh + 1) / 2) // 2 / 2))
        # fw = int(np.ceil(((rw + 1) / 2) // 2 / 2))
        fh = int(np.ceil(np.ceil(np.ceil(rh / 2) / 2) / 2))
        fw = int(np.ceil(np.ceil(np.ceil(rw / 2) / 2) / 2))

        fm_list = []
        for i in range(self._stages):
            fm_list.append((fh, fw))
            fh = int(np.ceil(fh / 2))
            fw = int(np.ceil(fw / 2))
        fm_list = fm_list[::-1]
        cls_targets = []
        ctr_targets = []
        box_targets = []
        cor_targets = []
        stride = self._stride
        for i in range(self._stages):
            fh, fw = fm_list[i]
            cls_target = nd.zeros((fh, fw))
            box_target = nd.zeros((fh, fw, 4))
            ctr_target = nd.zeros((fh, fw))

            cx = nd.arange(0, fw).reshape((1, -1))
            cy = nd.arange(0, fh).reshape((-1, 1))
            sx = nd.tile(cx, reps=(fh, 1))
            sy = nd.tile(cy, reps=(1, fw))
            syx = nd.stack(sy.reshape(-1), sx.reshape(-1)).transpose().astype('int32')
            # bugs in this type
            # bx = sxy[:, 0] * stride + nd.floor(sxy[:, 0] / 2).astype(np.int32)
            # by = sxy[:, 1] * stride + nd.floor(sxy[:, 1] / 2).astype(np.int32)
            by = syx[:, 0] * stride
            bx = syx[:, 1] * stride
            cor_targets.append(nd.stack(bx, by, axis=1))

            # [FH*FW, N, 4]
            of_byx = offsets[by, bx]
            # of_byx = nd.gather_nd(offsets, indices=byx.transpose())
            min_vr, max_vr = self._valid_range[i]
            # [FH*FW, N]
            is_in_box = nd.prod(of_byx > 0, axis=-1)
            is_valid_area = (of_byx.max(axis=-1) >= min_vr) * (of_byx.max(axis=-1) <= max_vr)
            # [FH*FW, N]
            valid_pos = nd.elemwise_mul(is_in_box, is_valid_area)
            of_valid = nd.zeros((fh, fw, n))
            of_valid[syx[:, 0], syx[:, 1], :] = valid_pos # 1, 0
            of_valid[:, :, 0] = 0
            # [FH, FW]
            gt_inds = nd.argmax(of_valid, axis=-1)
            # box targets
            box_target[syx[:, 0], syx[:, 1]] = boxes[gt_inds[syx[:, 0], syx[:, 1]], :4]
            box_target = box_target.reshape(-1, 4)
            # cls targets
            cls_target[syx[:, 0], syx[:, 1]] = cls[gt_inds[syx[:, 0], syx[:, 1]]]
            cls_target = cls_target.reshape(-1)
            # ctr targets
            ctr_target[syx[:, 0], syx[:, 1]] = ctr[by, bx, gt_inds[syx[:, 0], syx[:, 1]]]
            ctr_target = ctr_target.reshape(-1)
            box_targets.append(box_target)
            cls_targets.append(cls_target)
            ctr_targets.append(ctr_target)
            stride = int(stride / 2)
        box_targets = nd.concat(*box_targets, dim=0)
        cls_targets = nd.concat(*cls_targets, dim=0)
        ctr_targets = nd.concat(*ctr_targets, dim=0)
        cor_targets = nd.concat(*cor_targets, dim=0)
        cor_targets = cor_targets.astype('float32')

        return cls_targets, ctr_targets, box_targets, cor_targets
Пример #30
0
def BReLU(x):
    return nd.minimum(1., nd.maximum(0., x))
Пример #31
0
tree._grow(nd.array([[1, 1]]))

# %%

from Node import Node

Node()

node.collect_params()

# %%

a = nd.array([[1, 2], [3, 4], [-10, -10]])
a
upper = nd.max(a, axis=0)
lower = nd.min(a, axis=0)

e = nd.random.exponential(1 / nd.sum(upper - lower))

(upper, lower, e)

nd.random.multinomial(nd.array([0.5, 0.5]), 10)

# %%

if (nd.sum(nd.array([0, 0])) == 0):
    print("yay")

nd.minimum(nd.array([4, 5]), nd.array([0, 6]))
Пример #32
0
    noise_batch = noise_std * nd.random.normal(shape=(batch_size,
                                                      num_measurements))
    x_batch = x_batch.reshape((batch_size, 784))
    y_batch = nd.dot(x_batch, A) + noise_batch

    ########################
    ### Lasso
    ########################
    x_hat_batch_Lasso = nd.zeros([batch_size, 784])
    lasso_est = Lasso(alpha=lmbd)
    for i in range(batch_size):
        y_val = y_batch[i]
        lasso_est.fit(A.T.asnumpy(), y_val.reshape(num_measurements).asnumpy())
        x_hat_lasso = nd.array(lasso_est.coef_)
        x_hat_lasso = nd.reshape(x_hat_lasso, [-1])
        x_hat_lasso = nd.maximum(nd.minimum(x_hat_lasso, 1), 0)
        x_hat_batch_Lasso[i] = x_hat_lasso

    ########################
    ### OMP Algorithm
    ########################
    omp_est = OrthogonalMatchingPursuit(n_nonzero_coefs=num_measurements / 2)
    x_hat_batch_OMP = nd.zeros([batch_size, 784])
    for i in range(batch_size):
        y_val = y_batch[i]
        omp_est.fit(A.T.asnumpy(), y_val.reshape(num_measurements).asnumpy())
        x_hat_OMP = nd.array(omp_est.coef_)
        x_hat_OMP = nd.reshape(x_hat_OMP, [-1])
        x_hat_OMP = nd.maximum(nd.minimum(x_hat_OMP, 1), 0)
        x_hat_batch_OMP[i] = x_hat_OMP