示例#1
0
    def add_split(x, leaf, p_tau):
        center = leaf.parent['node'].center.data()
        radius = leaf.parent['node'].radius.data()
        tau = p_tau + nd.random.exponential(radius**-1)
        while 1:
            s = nd.random.normal(shape=(2, x.shape[-1]))
            s = s / nd.norm(s, axis=-1, keepdims=True)
            r = nd.random.uniform(low=nd.array([0]), high=radius)
            r = r * nd.random.uniform()**(1 / 3)
            if nd.sign(s[0][-1]) > 0:
                weight = s[0]
                bias = nd.dot(s[0], -1 * r * (s[1] + center))
                y = nd.sign(nd.dot(x, weight) + bias)
                if nd.abs(nd.sum(y)) != len(y):
                    break

        split = Split(weight=weight,
                      bias=bias,
                      sharpness=3 / radius,
                      tau=tau,
                      decision=leaf.parent['decision'],
                      side=leaf.parent['side'])
        tree.splits.add(split)
        leaf.parent['node'].child['decision'] = split
        leaf.parent['decision'] = split
示例#2
0
 def forward(self, x_i, x_j, t_i, t_j):
     s_i = self.scorer(x_i)
     s_j = self.scorer(x_j)
     s_diff = s_i - s_j
     if self.loss_type == 'hinge':
         loss = nd.relu(1.0 - s_diff * nd.sign(t_i - t_j))
     else:  # more loss_types can be defined here
         loss = nd.sign(t_j - t_i) * s_diff / 2. + nd.log(1 +
                                                          nd.exp(-s_diff))
     # loss = nd.mean(loss, axis=0)
     return loss
示例#3
0
def predict_auc(net, dataloader, length):
    num_correct = 0.0
    num_total = length
    prediclist = []
    labellist = []

    for i, (a, label) in enumerate(dataloader):
        a = a.as_in_context(model_ctx)
        label = label.as_in_context(model_ctx)
        output = net(a)
        prediction = (nd.sign(output) + 1) / 2
        logi = logistic(output)
        prediclist.append(logi.asnumpy())
        labellist.append(label.asnumpy())
        num_correct += nd.sum(prediction == label)
    print("Accuracy: %0.3f (%s/%s)" % (num_correct.asscalar() / num_total,
                                       num_correct.asscalar(), num_total))

    from sklearn.metrics import roc_curve, auc
    ytrue = [item[0] for batch in labellist for item in batch]
    ypred = [item[0] for batch in prediclist for item in batch]
    fpr, tpr, thresholds = roc_curve(ytrue, ypred, pos_label=1)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr,
             tpr,
             lw=1,
             alpha=0.3,
             label='ROC fold %d (AUC = %0.2f)' % (1, roc_auc))
    print('AUC: %.5f' % roc_auc)
示例#4
0
def get_final_preds(batch_heatmaps, center, scale):
    coords, maxvals = get_max_pred(batch_heatmaps)

    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]

    # post-processing
    for n in range(coords.shape[0]):
        for p in range(coords.shape[1]):
            hm = batch_heatmaps[n][p]
            px = int(nd.floor(coords[n][p][0] + 0.5).asscalar())
            py = int(nd.floor(coords[n][p][1] + 0.5).asscalar())
            if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
                diff = nd.concat(hm[py][px+1] - hm[py][px-1],
                                 hm[py+1][px] - hm[py-1][px],
                                 dim=0)
                coords[n][p] += nd.sign(diff) * .25

    preds = nd.zeros_like(coords)

    # Transform back
    for i in range(coords.shape[0]):
        preds[i] = transform_preds(coords[i], center[i], scale[i],
                                   [heatmap_width, heatmap_height])

    return preds, maxvals
 def implement_1(self, x, label):
     '''
     following paper to implement
     '''
     #  weight normalize
     with x.context:
         w = self.weight.data()
     w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape((-1, 1))
     #  cos_theta = x'w/|x|. note: |w| = 1
     x_norm = nd.power(x, 2)
     x_norm = nd.sum(x_norm, axis=1)
     x_norm = nd.sqrt(x_norm)
     cos_theta = nd.dot(x, w_norm, transpose_b=True)
     cos_theta = cos_theta / x_norm.reshape((-1, 1))
     cos_theta = nd.clip(cos_theta, -1, 1)
     #  cos_m_theta = cos(m * theta)
     cos_m_theta = self.margin_cos[self.margin](cos_theta)
     #  k
     with mx.autograd.pause():
         theta = nd.arccos(cos_theta)
         k = nd.sign((self.margin * theta / math.pi))
     #  i=j is phi_theta and i!=j is cos_theta
     phi_theta = ((-1)**k) * cos_m_theta - 2 * k
     x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta
     x_norm_cos_theta = x_norm.reshape((-1, 1)) * cos_theta
     #  i=j index
     with mx.autograd.pause():
         index = nd.one_hot(label, x_norm_phi_theta.shape[1])
     #  output
     with mx.autograd.pause():
         lamb = self.__get_lambda()
     output = x_norm_cos_theta * 1.0
     output = output - x_norm_cos_theta * index / (1 + lamb)
     output = output + x_norm_phi_theta * index / (1 + lamb)
     return output
示例#6
0
def get_final_preds(batch_heatmaps, center, scale):
    coords, maxvals = get_max_pred(batch_heatmaps)

    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]

    # post-processing
    for n in range(coords.shape[0]):
        for p in range(coords.shape[1]):
            hm = batch_heatmaps[n][p]
            px = int(nd.floor(coords[n][p][0] + 0.5).asscalar())
            py = int(nd.floor(coords[n][p][1] + 0.5).asscalar())
            if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
                diff = nd.concat(hm[py][px+1] - hm[py][px-1],
                                 hm[py+1][px] - hm[py-1][px],
                                 dim=0)
                coords[n][p] += nd.sign(diff) * .25

    preds = nd.zeros_like(coords)

    # Transform back
    for i in range(coords.shape[0]):
        preds[i] = transform_preds(coords[i], center[i], scale[i],
                                   [heatmap_width, heatmap_height])

    return preds, maxvals
示例#7
0
    def tesselate(x, leaf, p_tau):
        if (len(x) < 2):
            return

        add_split(x, leaf, p_tau)
        split = leaf.parent['decision']
        node = leaf.parent['node']
        side = nd.sign(split.split(x))
        order = nd.argsort(side, axis=None)
        x = x[order, :]
        side = side[order, :]
        orderside = nd.argsort(side, axis=0) * side
        cutpt = nd.argsort(orderside, axis=None,
                           dtype='int32')[0].asscalar() + 1
        x_l = x[0:cutpt]
        x_r = x[cutpt:None]
        leaf.parent['side'] = 0
        new_leaf = Leaf(layer=tree.layer_initializer(),
                        node=leaf.parent['node'],
                        decision=leaf.parent['decision'],
                        side=1)
        tree.leaves.add(new_leaf)
        add_node(x_l, leaf)
        add_node(x_r, new_leaf)
        node.child['left'] = leaf.parent['node']
        node.child['right'] = new_leaf.parent['node']
        tesselate(x_l, leaf, split.tau.data())
        tesselate(x_r, new_leaf, split.tau.data())
示例#8
0
def heatmap_to_coord_alpha_pose(hms, boxes):
    hm_h = hms.shape[2]
    hm_w = hms.shape[3]
    coords, maxvals = get_max_pred(hms)
    if boxes.shape[1] == 1:
        pt1 = mx.nd.array(boxes[:, 0, (0, 1)], dtype=hms.dtype)
        pt2 = mx.nd.array(boxes[:, 0, (2, 3)], dtype=hms.dtype)
    else:
        assert boxes.shape[1] == 4
        pt1 = mx.nd.array(boxes[:, (0, 1)], dtype=hms.dtype)
        pt2 = mx.nd.array(boxes[:, (2, 3)], dtype=hms.dtype)

    # post-processing
    for n in range(coords.shape[0]):
        for p in range(coords.shape[1]):
            hm = hms[n][p]
            px = int(nd.floor(coords[n][p][0] + 0.5).asscalar())
            py = int(nd.floor(coords[n][p][1] + 0.5).asscalar())
            if 1 < px < hm_w - 1 and 1 < py < hm_h - 1:
                diff = nd.concat(hm[py][px + 1] - hm[py][px - 1],
                                 hm[py + 1][px] - hm[py - 1][px],
                                 dim=0)
                coords[n][p] += nd.sign(diff) * .25

    preds = nd.zeros_like(coords)
    for i in range(hms.shape[0]):
        for j in range(hms.shape[1]):
            preds[i][j] = transformBoxInvert(coords[i][j], pt1[i], pt2[i],
                                             hm_h, hm_w)

    return preds, maxvals
示例#9
0
def get_final_preds(batch_heatmaps, center, scale):
    from gluoncv.data.transforms.pose import get_max_pred
    coords, maxvals = get_max_pred(batch_heatmaps)

    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]

    # post-processing
    for n in range(coords.shape[0]):
        for p in range(coords.shape[1]):
            hm = batch_heatmaps[n][p]
            px = int(nd.floor(coords[n][p][0] + 0.5).asscalar())
            py = int(nd.floor(coords[n][p][1] + 0.5).asscalar())
            if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
                diff = nd.concat(hm[py][px + 1] - hm[py][px - 1],
                                 hm[py + 1][px] - hm[py - 1][px],
                                 dim=0)
                coords[n][p] += nd.sign(diff) * .25

    preds = nd.zeros_like(coords)

    # Transform back
    for i in range(coords.shape[0]):
        w_ratio = coords[i][:, 0] / heatmap_width
        h_ratio = coords[i][:, 1] / heatmap_height
        preds[i][:, 0] = scale[i][0] * 2 * w_ratio + center[i][0] - scale[i][0]
        preds[i][:, 1] = scale[i][1] * 2 * h_ratio + center[i][1] - scale[i][1]

    return preds, maxvals
 def implement_0(self, x, label):
     '''
     following the sphereface code of caffe
     '''
     #  weight normalize
     with x.context:
         w = self.weight.data()
     with mx.autograd.pause():
         w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape(
             (-1, 1))
         w[:] = w_norm
     #  x_norm = |x|
     x_norm = nd.power(x, 2)
     x_norm = nd.sum(x_norm, axis=1)
     x_norm = nd.sqrt(x_norm)
     #  cos_theta = x'w/|x|. note: |w| = 1
     cos_theta = nd.dot(x, w, transpose_b=True)
     cos_theta = cos_theta / x_norm.reshape((-1, 1))
     #  cos_theta_quadratic & cos_theta_quartic
     cos_theta_quadratic = cos_theta**2
     cos_theta_quartic = cos_theta**4
     with mx.autograd.pause():
         #  sign_0 = sign(cos_theta)
         sign_0 = nd.sign(cos_theta)
         #  sign_3 = sign_0 * sign(2 * cos_theta_quadratic_ - 1)
         sign_3 = sign_0 * nd.sign(2 * cos_theta_quadratic - 1)
         #  sign_4 = 2 * sign_0 + sign_3 - 3
         sign_4 = 2 * sign_0 + sign_3 - 3
     #  phi_theta = (sign_3 * (8 * cos_theta_quartic - 8 * cos_theta_quadratic + 1) + sign_4)
     phi_theta = sign_3 * (8 * cos_theta_quartic - 8 * cos_theta_quadratic +
                           1) + sign_4
     x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta
     #  i=j index
     with mx.autograd.pause():
         index = nd.one_hot(label, x_norm_phi_theta.shape[1])
     #  output
     with mx.autograd.pause():
         lamb = self.__get_lambda()  # 10
     output = nd.dot(x, w, transpose_b=True)
     output2 = output * (1.0 - index) + x_norm_phi_theta * index
     output3 = (output2 + lamb * nd.dot(x, w, transpose_b=True)) / (1 +
                                                                    lamb)
     return output3
示例#11
0
def SGD(params, lr):
    lambdaval = .01
    for idx, param in enumerate(params):
        if idx % 2 == 0:
            if isinstance(w_mask[idx], list):
                param[:] = param - lr * (param.grad +
                                         lambdaval * nd.sign(param.grad))
            else:
                param[:] = (param - lr * param.grad) * w_mask[idx]
        else:
            param[:] = param - lr * param.grad
示例#12
0
def trim_attack(epoch, v, net, lr, f):
    # local model poisoning attack against Trimmed-mean
    vi_shape = v[0].shape
    v_tran = nd.concat(*v, dim=1)
    maximum_dim = nd.max(v_tran, axis=1).reshape(vi_shape)
    minimum_dim = nd.min(v_tran, axis=1).reshape(vi_shape)
    direction = nd.sign(nd.sum(nd.concat(*v, dim=1), axis=-1, keepdims=True))
    directed_dim = (direction > 0) * minimum_dim + (direction <
                                                    0) * maximum_dim
    # let the malicious clients (first f clients) perform the attack
    for i in range(f):
        random_12 = 1. + nd.random.uniform(shape=vi_shape)
        v[i] = directed_dim * ((direction * directed_dim > 0) / random_12 +
                               (direction * directed_dim < 0) * random_12)
    return v
示例#13
0
    def forward(self, x=0):
        if (mx.autograd.is_training()):
            u = nd.random.uniform(0, 1)
            s = nd.log(u) - nd.log(1 - u) + self._qz_loga.data()
            if (self._temperature == 0):
                s = nd.sign(s)
            else:
                s = nd.sigmoid(s / self._temperature)

        else:
            s = nd.sigmoid(self._qz_loga.data())

        s = s * (self._limit_hi - self._limit_lo) + self._limit_lo

        return nd.minimum(1, nd.maximum(s, 0))
示例#14
0
def full_trim(epoch, v, net, f, lr, active, max_flip=1.0):
    # apply full knowledge trimmed mean attack
    vi_shape = v[0].shape
    v_tran = nd.concat(*v, dim=1)
    maximum_dim = nd.max(v_tran, axis=1).reshape(vi_shape)
    minimum_dim = nd.min(v_tran, axis=1).reshape(vi_shape)
    direction = nd.sign(nd.sum(nd.concat(*v, dim=1), axis=-1, keepdims=True))
    #direction = old_direction
    directed_dim = (direction > 0) * minimum_dim + (direction <
                                                    0) * maximum_dim

    for i in range(f):
        random_12 = 1 + nd.random.uniform(shape=vi_shape)
        if (active[0] < f):
            v[i] = directed_dim * ((direction * directed_dim > 0) / random_12 +
                                   (direction * directed_dim < 0) * random_12)
    #pdb.set_trace()
    return v
示例#15
0
def full_krum(epoch, v, net, f, lr, active, max_flip=1.0):

    if (f == 0):
        return v
    e = 0.00001 / len(v[0])
    avg_grads = nd.sum(nd.concat(*v, dim=1), axis=-1, keepdims=True)
    direction = nd.sign(avg_grads)
    topk = nd.argsort(nd.abs(avg_grads).reshape(-1))
    n_flips = int(max_flip * len(v[0]))
    current_f = len(np.where(np.where(active < f)[0] < f)[0])
    l_max = lambda_max(epoch, v, net, current_f, lr)
    l = find_lambda(l_max, v, direction, len(v), current_f, lr, topk, max_flip)
    print(l)
    if (l > 0 and active[0] < f):
        v[0][topk[-n_flips:]] = -(direction[topk[-n_flips:]] * l) / lr
        for i in range(1, f):
            if (active[i] < f):
                v[i] = mx.nd.random.uniform(v[0] - e, v[0] + e)
    return v
示例#16
0
def partial_trim(epoch, v, net, f):
    # apply partial knowledge trimmed mean attack

    vi_shape = v[0].shape

    #first compute the distribution parameters
    all_grads = nd.concat(*v, dim=1)
    adv_grads = all_grads[:, :f]
    e_mu = nd.mean(adv_grads, axis=1)  # mean
    e_sigma = nd.sqrt(
        nd.sum(nd.square(nd.subtract(adv_grads, e_mu.reshape(-1, 1))), axis=1)
        / f)  # standard deviation

    for i in range(f):
        # apply attack to compromised worker devices with randomness
        v[i] = (
            e_mu - nd.multiply(e_sigma, nd.sign(e_mu)) *
            (3. + nd.random.uniform(shape=e_sigma.shape))).reshape(vi_shape)

    return v
示例#17
0
        def _shard(split, x, l_fn, r_fn):
            splitsortorder = nd.argsort(split, axis=None)
            reorderedx = x[splitsortorder, :]
            reorderedsplit = split[splitsortorder]

            if (reorderedsplit[0] > 0):
                r_fn(reorderedx)
            elif (reorderedsplit[-1] < 0):
                l_fn(reorderedx)
            else:

                splitpt = nd.argsort(reorderedsplit,
                                     axis=0) * nd.sign(reorderedsplit)
                splitpt = nd.argsort(splitpt, axis=None)[0] + 1
                lx = nd.slice_axis(reorderedx, 0, 0, int(splitpt.asscalar()))
                rx = nd.slice_axis(reorderedx, 0, int(splitpt.asscalar()),
                                   None)

                l_fn(lx)
                r_fn(rx)
示例#18
0
    def hybrid_forward(self, F, x, weight, bias=None):
        Cout, Cin, k1, k2 = weight.shape
        wmask = nd.topk(nd.abs(weight).reshape(Cout, Cin, -1),
                        k=3,
                        ret_typ='mask')
        wmask = (wmask == 0) * 0.1 + wmask
        wmask = wmask.reshape(Cout, Cin, k1, k2).as_in_context(x.context)

        # ratio = 1 - 0.5 * global_param.get_kept_ratio()
        # wmask = np.ones((Cout, Cin, k1 * k2))
        # wmask[:, :, self.mask] = ratio
        # name = self.name + '_weight'
        # global_param.netMask[name] = wmask

        temp_weight = weight.reshape((Cout, Cin, -1))
        new_weight = temp_weight[:, :, self.reidx].reshape(Cout, Cin, k1, k2)
        new_weight = (weight + nd.sign(weight) * nd.abs(new_weight))

        # ratio=0.2 if ratio<0.2 else ratio
        return super(new_location_conv,
                     self).hybrid_forward(F, x, new_weight * wmask, bias)
示例#19
0
def evaluate_accuracy(data_iterator, net, ctx, loss_fun, num_classes):
    """
    This function is used for evaluating accuracy of
    a given data iterator. (Either Train/Test data)
    It takes in the loss function used too!
    """
    acc = mx.metric.Accuracy()
    loss_avg = 0.
    for i, (data, labels) in enumerate(data_iterator):
        data = data.as_in_context(ctx)  #.reshape((-1,784))
        labels = labels.as_in_context(ctx)
        output = net(data)
        loss = loss_fun(output, labels)
        preds = []
        if (num_classes == 2):
            preds = (nd.sign(output) + 1) / 2
            preds = preds.reshape(-1)
        else:
            preds = nd.argmax(output, axis=1)
        acc.update(preds=preds, labels=labels)
        loss_avg = loss_avg * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1)
    return acc.get()[1], loss_avg
示例#20
0
    def recurse(x, node, p_tau):
        n = 1
        mean = node.center.data()
        var = (0.5 * node.radius.data())**2
        N = x.shape[0]
        x_mean = nd.mean(x, axis=0)
        x_var = (N**-1) * nd.sum((x - x_mean)**2, axis=0)
        z_mean = (n * mean + N * x_mean) / (n + N)
        z_var = ((n * (mean + var) + N * (x_mean + x_var)) / (n + N)) - z_mean
        z_radius = 2 * (nd.max(z_var)**0.5)
        node.center.set_data(z_mean)
        node.radius.set_data(z_radius)
        if node.child['decision'] is None:
            leaf = next(l for l in tree.leaves if l.parent['node'] == node)
            tesselate(x, leaf, p_tau)
            return

        E = nd.random.exponential(z_radius**-1)
        node.child['decision'].tau.set_data(p_tau + E)
        split = node.child['decision']
        side = nd.sign(split.split(x))
        order = nd.argsort(side, axis=None)
        x = x[order, :]
        side = side[order, :]
        if side[0] > 0:
            recurse(x, node.child['right'], split.tau.data())
        elif side[-1] < 0:
            recurse(x, node.child['left'], split.tau.data())
        else:
            orderside = nd.argsort(side, axis=0) * side
            cutpt = nd.argsort(orderside, axis=None,
                               dtype='int32')[0].asscalar() + 1
            x_l = x[0:cutpt]
            x_r = x[cutpt:None]
            recurse(x_l, node.child['left'], split.tau.data())
            recurse(x_r, node.child['right'], split.tau.data())
示例#21
0
        def _recurse(node,
                     path=nd.zeros_like(splt[:, 0]),
                     prob=nd.ones_like(splt[:, 0]),
                     remain=nd.zeros_like(splt[:, 0])):
            children = self._structure[node]

            i_node = next(
                key for key, value in self._weightlayer._children.items()
                if value == node._box)
            i_node = int(i_node)

            # calculate the embedd matrix
            embedd[i_node] = node()

            # calculate the router matrix
            if (node._box._parent is not None):
                i = next(key
                         for key, value in self._routerlayer._children.items()
                         if value == node._box._parent._decision)
                i = int(i)
                direction = self._structure[node._box._parent][node]
                # path += splt[:, i] * direction - 1
                path = path + splt[:, i] * direction - 1

            router[:, i_node] = path + 0.5

            # calculate the weight matrix
            if (node._box._parent is not None and children is not None):
                i_parent = next(
                    key for key, value in self._weightlayer._children.items()
                    if value == node._box._parent._box)
                i_parent = int(i_parent)
                # prob *= (1 - psep[:, i_parent])
                prob = prob * (1 - psep[:, i_parent])

            if (children is None):
                w = 1 - remain
            else:
                w = psep[:, i_node] * prob
                # remain += w
                remain = remain + w

            weight[:, i_node] = w

            # calculate the partial router matrix
            path_mat = nd.zeros_like(psep)
            pie = nd.maximum(nd.sign(path + 1), 0)
            cur_node = node
            cur_path = path + 0

            while (1):
                i_cur_node = next(
                    key for key, value in self._weightlayer._children.items()
                    if value == cur_node._box)
                i_cur_node = int(i_cur_node)
                frac = nd.maximum(cur_path + 0.5, -0.5) + 0.5
                path_mat[:, i_cur_node] = frac * pie
                # pie -= frac * pie
                pie = pie - frac * pie

                if (cur_node._box._parent is not None):
                    cur_i = next(
                        key
                        for key, value in self._routerlayer._children.items()
                        if value == cur_node._box._parent._decision)
                    cur_i = int(cur_i)
                    cur_direction = self._structure[
                        cur_node._box._parent][cur_node]

                    # cur_path -= splt[:, cur_i] * cur_direction - 1
                    cur_path = cur_path - (splt[:, cur_i] * cur_direction - 1)

                    cur_node = cur_node._box._parent
                else:
                    router_mat[:, i_node, :] = path_mat
                    break

            if (children is not None):
                left = next(key for key, value in children.items()
                            if value == -1)
                right = next(key for key, value in children.items()
                             if value == 1)

                _recurse(left, path + 0, prob + 0, remain + 0)
                _recurse(right, path + 0, prob + 0, remain + 0)

            return (router, router_mat, weight, embedd)
示例#22
0
        def _recurse(node,
                     path=nd.zeros_like(splt[:, 0]),
                     prob=nd.ones_like(splt[:, 0]),
                     remain=nd.zeros_like(splt[:, 0])):
            children = self._structure[node]

            i_node = next(
                key for key, value in self._weightlayer._children.items()
                if value == node._box)
            i_node = int(i_node)

            # calculate the embedd matrix
            # embedd[i_node] = node()
            embedd[i_node] = node()

            # calculate the router matrix
            if (node._box._parent is not None):
                i = next(key
                         for key, value in self._routerlayer._children.items()
                         if value == node._box._parent._decision)
                i = int(i)
                direction = self._structure[node._box._parent][node]
                path = path + splt[:, i] * direction - 1

            # router[:, i_node] = path + 0.5
            router[i_node] = path + 0.5

            # prevent routing decay
            # path = nd.minimum(0, nd.sign(path + 1))

            # calculate the weight matrix
            if (node._box._parent is not None and children is not None):
                i_parent = next(
                    key for key, value in self._weightlayer._children.items()
                    if value == node._box._parent._box)
                i_parent = int(i_parent)
                prob = prob * (1 - psep[:, i_parent])

            if (children is None):
                w = 1 - remain
            else:
                w = psep[:, i_node] * prob
                remain = remain + w

            # weight[:, i_node] = w
            weight[i_node] = w

            # calculate the partial router matrix
            # path_mat_t = nd.zeros_like(psep)
            path_mat = {}
            pie = nd.maximum(nd.sign(path + 1), 0)
            cur_node = node
            cur_path = path + 0

            while (1):
                i_cur_node = next(
                    key for key, value in self._weightlayer._children.items()
                    if value == cur_node._box)
                i_cur_node = int(i_cur_node)
                frac = nd.maximum(cur_path + 0.5, -0.5) + 0.5
                # path_mat_t[:, i_cur_node] = frac * pie
                path_mat[i_cur_node] = frac * pie
                pie = pie - frac * pie

                if (cur_node._box._parent is not None):
                    cur_i = next(
                        key
                        for key, value in self._routerlayer._children.items()
                        if value == cur_node._box._parent._decision)
                    cur_i = int(cur_i)
                    cur_direction = self._structure[
                        cur_node._box._parent][cur_node]
                    cur_path = cur_path - (splt[:, cur_i] * cur_direction - 1)
                    cur_node = cur_node._box._parent
                else:
                    # router_mat_t[:, i_node, :] = path_mat_t
                    n_node = len(self._weightlayer)
                    router_mat[i_node] = nd.stack(*[
                        path_mat[key]
                        if key in path_mat else nd.zeros_like(splt[:, 0])
                        for key in range(n_node)
                    ],
                                                  axis=-1)

                    break

            if (children is not None):
                left = next(key for key, value in children.items()
                            if value == -1)
                right = next(key for key, value in children.items()
                             if value == 1)

                _recurse(left, path + 0, prob + 0, remain + 0)
                _recurse(right, path + 0, prob + 0, remain + 0)

            return (router, router_mat, weight, embedd)
示例#23
0
def trim(epoch,
         gradients,
         net,
         lr,
         byz,
         old_direction,
         active,
         blacklist,
         susp,
         f=0,
         cmax=0,
         utrg=0.0,
         udet=0.50,
         urem=3):

    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients
    ]
    param_list = byz(epoch, param_list, net, f, lr, active)
    flip_local = nd.zeros(len(param_list))
    flip_new = nd.zeros(len(param_list))
    penalty = 1.0 - cmax / len(param_list)
    reward = 1.0 - penalty

    for i in range(len(param_list)):
        direction = nd.sign(param_list[i])
        flip_local[i] = 0.5 * (mx.nd.sum(
            direction.reshape(-1) *
            (direction.reshape(-1) - old_direction.reshape(-1)))).asscalar()
        #flip = nd.sign(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))
        #flip_new[i] = nd.sum(flip*(param_list[i].reshape(-1)**2))

        #flip[param_list[i]<0.0001] = 0
        #flip_new[i] = nd.sum(flip).asscalar()
    #argsorted = nd.argsort(flip_local)
    argsorted = nd.argsort(flip_local)
    if (cmax > 0):
        susp[argsorted[:-cmax]] = susp[argsorted[:-cmax]] + reward
        susp[argsorted[-cmax:]] = susp[argsorted[-cmax:]] - penalty
    argsorted = nd.argsort(susp)
    weights = nd.exp(susp) / nd.sum(nd.exp(susp))
    matrix = nd.transpose(
        nd.transpose(nd.concat(*[ii for ii in param_list], dim=1)))
    trim_nd = nd.linalg.gemm2(matrix, weights.reshape(-1, 1))
    #pdb.set_trace()
    #print (flip_new, weights)
    #print (nd.mean(flip_local[:cmax]), nd.mean(flip_new[:cmax]), nd.mean(flip_local[cmax:]), nd.mean(flip_new[cmax:]))
    '''new_list = []
    argsorted = nd.argsort(susp) 
    for i in range(len(param_list)-cmax):
        new_list.append(param_list[int(argsorted[i].asscalar())])
    
    sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1)
    trim_nd = nd.mean(sorted_array, axis=-1, keepdims=1)'''
    global_direction = nd.sign(trim_nd)
    gfs = 0.5 * (mx.nd.sum(
        global_direction.reshape(-1) *
        (global_direction.reshape(-1) - old_direction.reshape(-1)))
                 ).asscalar()
    '''if (utrg > 0):
        sorted_array = nd.sort(nd.concat(*param_list, dim=1), axis=-1)
        n = len(param_list)
        m = n - f*2
        trim_nd = nd.mean(sorted_array[:, f:(f+m)], axis=-1, keepdims=1)    
        direction = nd.sign(trim_nd) 
        gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
    
    if ((utrg>0 and gfs>=utrg*len(param_list[0])) or (utrg == 0)):
        flip_score = mx.nd.zeros(len(param_list))
        rem = []
        for i in range (len(param_list)):
            direction = nd.sign(param_list[i])
            flip_score[i] = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
            flip_local[active[i]] = flip_score[i].asscalar()
        argsorted = nd.argsort(flip_score) 
        new_list = []
        for i in range(len(param_list)-cmax):
            new_list.append(param_list[int(argsorted[i].asscalar())])
            
        n = len(new_list)
        f = 0
        m = n - f*2
        sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1)
        trim_nd = nd.mean(sorted_array[:, f:(f+m)], axis=-1, keepdims=1) 
        
        for i in range(len(new_list), len(param_list)):
            index = int(argsorted[i].asscalar())
            if (flip_score[index] >= udet*len(param_list[0])):
                susp[active[index]] = susp[active[index]] + 1
                if (susp[active[index]] >= urem):
                    blacklist[active[index]] = 1
                    rem.append(active[index])
        active = removearr(active, sorted(rem), len(param_list))             
             
        direction = nd.sign(trim_nd)
        gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
        cmax = cmax - len(rem)      '''

    idx = 0
    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req == 'null':
            continue
        param.set_data(
            param.data() - lr *
            trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape))
        idx += param.data().size
    return trim_nd, direction, cmax, gfs, flip_local, flip_new
示例#24
0
        cumulative_loss += nd.sum(loss).asscalar()
    print("Epoch %s, loss: %s" % (e, cumulative_loss))
    loss_sequence.append(cumulative_loss)

# plot the convergence of the estimated loss function
#%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

plt.figure(num=None, figsize=(8, 6))
plt.plot(loss_sequence)

# Adding some bells and whistles to the plot
plt.grid(True, which="both")
plt.xlabel('epoch', fontsize=14)
plt.ylabel('average loss', fontsize=14)

plt.show()

num_correct = 0.0
num_total = len(Xtest)
for i, (data, label) in enumerate(test_data):
    data = data.as_in_context(model_ctx)
    label = label.as_in_context(model_ctx)
    output = net(data)
    prediction = (nd.sign(output) + 1) / 2
    num_correct += nd.sum(prediction == label)
print("Accuracy: %0.3f (%s/%s)" %
      (num_correct.asscalar() / num_total, num_correct.asscalar(), num_total))
示例#25
0
def krum(epoch,
         gradients,
         net,
         lr,
         byz,
         old_direction,
         active,
         blacklist,
         susp,
         f=0,
         cmax=0,
         utrg=0,
         udet=0.50,
         urem=3,
         max_flip=1.0):

    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients
    ]
    param_list = byz(epoch, param_list, net, f, lr, active, max_flip)

    flip_local = nd.zeros(len(param_list))
    penalty = 1.0 - cmax / len(param_list)
    reward = 1.0 - penalty

    for i in range(len(param_list)):
        direction = nd.sign(param_list[i])
        flip_local[i] = 0.5 * (mx.nd.sum(
            direction.reshape(-1) *
            (direction.reshape(-1) - old_direction.reshape(-1)))).asscalar()
    argsorted = nd.argsort(flip_local)
    susp[argsorted[:-cmax]] = susp[argsorted[:-cmax]] - reward
    susp[argsorted[-cmax:]] = susp[argsorted[-cmax:]] + penalty

    new_list = []
    argsorted = nd.argsort(susp)
    for i in range(len(param_list) - cmax):
        new_list.append(param_list[int(argsorted[i].asscalar())])

    k = len(new_list) - 0 - 2
    dist = mx.nd.zeros((len(new_list), len(new_list)))
    for i in range(0, len(new_list)):
        for j in range(0, i):
            dist[i][j] = nd.norm(new_list[i] - new_list[j])
            dist[j][i] = dist[i][j]

    sorted_dist = mx.nd.sort(dist)
    sum_dist = mx.nd.sum(sorted_dist[:, :k + 1], axis=1)
    model_selected = argsorted[mx.nd.argmin(sum_dist).asscalar().astype(
        int)].asscalar().astype(int)
    global_direction = nd.sign(param_list[model_selected])
    gfs = 0.5 * (mx.nd.sum(
        global_direction.reshape(-1) *
        (global_direction.reshape(-1) - old_direction.reshape(-1)))
                 ).asscalar()
    '''if (utrg > 0):
        k = len(param_list) - f - 2
        dist = mx.nd.zeros((len(param_list),len(param_list)))
        for i in range (0, len(param_list)):
            for j in range(0, i):
                dist[i][j] = nd.norm(param_list[i] - param_list[j])
                dist[j][i] = dist[i][j]    
            
        sorted_dist = mx.nd.sort(dist)
        sum_dist = mx.nd.sum(sorted_dist[:,:k+1], axis=1)
        model_selected = mx.nd.argmin(sum_dist).asscalar().astype(int)   
        direction = nd.sign(param_list[model_selected])
        gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()

        for i in range (len(param_list)):
            direction = nd.sign(param_list[i])
            flip_score = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
            flip_local[active[i]] = flip_score
        
    if ((utrg>0 and gfs>=utrg*len(param_list[0])) or (utrg == 0)):
        flip_score = mx.nd.zeros(len(param_list))
        rem = []
        for i in range (len(param_list)):
            direction = nd.sign(param_list[i])
            flip_score[i] = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
            flip_local[active[i]] = flip_score[i].asscalar()
        argsorted = nd.argsort(flip_score) 
        new_list = []
        for i in range(len(param_list)-cmax):
            new_list.append(param_list[int(argsorted[i].asscalar())])
            
        k = len(new_list) - 0 - 2
        dist = mx.nd.zeros((len(new_list),len(new_list)))
        for i in range (0, len(new_list)):
            for j in range(0, i):
                dist[i][j] = nd.norm(new_list[i] - new_list[j])
                dist[j][i] = dist[i][j]    
                
        for i in range(len(new_list), len(param_list)):
            index = int(argsorted[i].asscalar())
            if (flip_score[index] >= udet*len(param_list[0])):
                susp[active[index]] = susp[active[index]] + 1
                if (susp[active[index]] >= urem):
                    blacklist[active[index]] = 1
                    rem.append(active[index])
        active = removearr(active, sorted(rem), len(param_list))             
            
        sorted_dist = mx.nd.sort(dist)
        sum_dist = mx.nd.sum(sorted_dist[:,:k+1], axis=1)
        model_selected = argsorted[mx.nd.argmin(sum_dist).asscalar().astype(int)].asscalar().astype(int)   
        direction = nd.sign(param_list[model_selected])
        gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
        cmax = cmax - len(rem)    '''

    idx = 0
    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req == 'null':
            continue
        param.set_data(param.data() - lr * param_list[model_selected][idx:(
            idx + param.data().size)].reshape(param.data().shape))
        idx += param.data().size

    return model_selected, direction, cmax, gfs, flip_local, 1.0  #flip_score[len(param_list)-cmax-1].asscalar()/len(param_list[0])
示例#26
0
 def poly_kernels(self, x: NDArray, y: NDArray):
     prod = nd.dot(x, y)
     return nd.sign(prod) * nd.abs(prod)**2
示例#27
0
def median(epoch,
           gradients,
           net,
           lr,
           byz,
           old_direction,
           active,
           blacklist,
           susp,
           f=0,
           cmax=0,
           utrg=0.0,
           udet=0.50,
           urem=3):

    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients
    ]
    param_list = byz(epoch, param_list, net, f, lr, active)

    flip_local = nd.zeros(len(param_list))
    penalty = 1.0 - cmax / len(param_list)
    reward = 1.0 - penalty

    for i in range(len(param_list)):
        direction = nd.sign(param_list[i])
        flip_local[i] = 0.5 * (mx.nd.sum(
            direction.reshape(-1) *
            (direction.reshape(-1) - old_direction.reshape(-1)))).asscalar()
    argsorted = nd.argsort(flip_local)
    susp[argsorted[:-cmax]] = susp[argsorted[:-cmax]] - reward
    susp[argsorted[-cmax:]] = susp[argsorted[-cmax:]] + penalty

    new_list = []
    argsorted = nd.argsort(susp)
    for i in range(len(param_list) - cmax):
        new_list.append(param_list[int(argsorted[i].asscalar())])

    sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1)
    if (len(new_list) % 2 == 1):
        trim_nd = sorted_array[:, int(len(new_list) / 2)]
    else:
        trim_nd = (sorted_array[:, int(len(new_list) / 2) - 1] +
                   sorted_array[:, int(len(new_list) / 2)]) / 2
    global_direction = nd.sign(trim_nd)
    gfs = 0.5 * (mx.nd.sum(
        global_direction.reshape(-1) *
        (global_direction.reshape(-1) - old_direction.reshape(-1)))
                 ).asscalar()
    '''if (utrg > 0):
        sorted_array = nd.sort(nd.concat(*param_list, dim=1), axis=-1)
        n = len(param_list)
        m = n - f*2
        if (len(param_list)%2 == 1):
            trim_nd = sorted_array[:, int(len(param_list)/2)]
        else:
            trim_nd = (sorted_array[:, int(len(param_list)/2)-1] + sorted_array[:, int(len(param_list)/2)])/2    
        direction = nd.sign(trim_nd) 
        gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
    
    if ((utrg>0 and gfs>=utrg*len(param_list[0])) or (utrg == 0)):
        flip_score = mx.nd.zeros(len(param_list))
        rem = []
        for i in range (len(param_list)):
            direction = nd.sign(param_list[i])
            flip_score[i] = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
            flip_local[active[i]] = flip_score[i].asscalar()
        argsorted = nd.argsort(flip_score) 
        new_list = []
        for i in range(len(param_list)-cmax):
            new_list.append(param_list[int(argsorted[i].asscalar())])
            
        n = len(new_list)
        f = 0
        sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1)
        if (len(new_list)%2 == 1):
            trim_nd = sorted_array[:, int(len(new_list)/2)]
        else:
            trim_nd = (sorted_array[:, int(len(new_list)/2)-1] + sorted_array[:, int(len(new_list)/2)])/2
        
        for i in range(len(new_list), len(param_list)):
            index = int(argsorted[i].asscalar())
            if (flip_score[index] >= udet*len(param_list[0])):
                susp[active[index]] = susp[active[index]] + 1
                if (susp[active[index]] >= urem):
                    blacklist[active[index]] = 1
                    rem.append(active[index])
        active = removearr(active, sorted(rem), len(param_list))             
             
        direction = nd.sign(trim_nd)
        gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar()
        cmax = cmax - len(rem)     '''

    idx = 0
    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req == 'null':
            continue
        param.set_data(
            param.data() - lr *
            trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape))
        idx += param.data().size
    return trim_nd, direction, cmax, gfs, flip_local
        batch = traindata.makebatch(params['symbols_in_batch'])
        print(batch.shape)
        batch = nd.array(batch).as_in_context(model_ctx)
        data = batch[1]
        labels = batch[0]
        with autograd.record():
            output = net(data)
        cpuoutput = output.as_in_context(data_ctx)
        cpulabels = labels.as_in_context(data_ctx)
        alignedlabels = alignbatch(cpuoutput,
                                   cpulabels).as_in_context(model_ctx)

        with autograd.record():
            #print("#######################")
            #print(alignedlabels.asnumpy().astype(np.int32))
            mask = nd.sign(alignedlabels)  #*(1-blank_weight)+blank_weight
            mask = mask.reshape((mask.shape[0], mask.shape[1], 1))
            #print(mask.shape)
            #print(output.shape)
            loss = loss_fn(output, alignedlabels, mask)
            count += 1
            if (count % 10 == 0):
                printvalid()
        loss.backward()
        trainer.step(data.shape[0])
        if (smoothed_loss == "null"):
            smoothed_loss = nd.mean(loss).asscalar()
        else:
            smoothed_loss = smoothed_loss * smoothing + (
                1 - smoothing) * nd.mean(loss).asscalar()
        #print(nd.mean(loss).asscalar())