Пример #1
0
def test_jitter_synthetic(
    jitter_method, float_type, ctx=mx.Context('cpu')
) -> None:
    # Initialize problem parameters
    batch_size = 1
    prediction_length = 50
    context_length = 5
    num_samples = 3

    # Initialize test data to generate Gaussian Process from
    lb = -5
    ub = 5
    dx = (ub - lb) / (prediction_length - 1)
    x_test = nd.arange(lb, ub + dx, dx, ctx=ctx, dtype=float_type).reshape(
        -1, 1
    )
    x_test = nd.tile(x_test, reps=(batch_size, 1, 1))

    # Define the GP hyper parameters
    amplitude = nd.ones((batch_size, 1, 1), ctx=ctx, dtype=float_type)
    length_scale = math.sqrt(0.4) * nd.ones_like(amplitude)
    sigma = math.sqrt(1e-5) * nd.ones_like(amplitude)

    # Instantiate desired kernel object and compute kernel matrix
    rbf_kernel = RBFKernel(amplitude, length_scale)

    # Generate samples from 0 mean Gaussian process with RBF Kernel and plot it
    gp = GaussianProcess(
        sigma=sigma,
        kernel=rbf_kernel,
        prediction_length=prediction_length,
        context_length=context_length,
        num_samples=num_samples,
        ctx=ctx,
        float_type=float_type,
        jitter_method=jitter_method,
        sample_noise=False,  # Returns sample without noise
    )

    # Generate training set on subset of interval using the sine function
    x_train = nd.array([-4, -3, -2, -1, 1], ctx=ctx, dtype=float_type).reshape(
        context_length, 1
    )
    x_train = nd.tile(x_train, reps=(batch_size, 1, 1))
    y_train = nd.sin(x_train.squeeze(axis=2))

    # Predict exact GP using the GP predictive mean and covariance using the same fixed hyper-parameters
    samples, predictive_mean, predictive_std = gp.exact_inference(
        x_train, y_train, x_test
    )

    assert (
        np.sum(np.isnan(samples.asnumpy())) == 0
    ), 'NaNs in predictive samples!'
Пример #2
0
def get_gen_loss(gen, disc, loss_fn, batch_size, z_dim, ctx):
    z = nd.random.randn(batch_size, z_dim, ctx=ctx)
    xhat = gen(z)
    y_pred = disc(xhat)
    y_true = nd.ones_like(y_pred)
    loss = loss_fn(y_pred, y_true)
    return loss
Пример #3
0
    def format_groundtruth(self,labels,XYWH): #generate target online with given labels
        B,H,W,boxNum,_  = XYWH.shape
        boxMask = nd.zeros((B,H,W,boxNum,1),ctx=XYWH.context)
        boxCls = nd.ones_like(boxMask, ctx=XYWH.context) * (-1) #-1 to indicated ignored item
        boxObj = nd.zeros((B,H,W,boxNum,1),ctx = XYWH.context)
        boxXYWH = nd.zeros((B,H,W,boxNum,4), ctx = XYWH.context)
        for b in range(B):
            label = labels[b].asnumpy()
            validLabel = label[np.where(label[:,1] > -0.5)[0],:]
            np.random.shuffle(validLabel) #shuffle to add random following
            for l in validLabel:
                cls,x0,y0,x1,y1 = l #stand label format
                w,h = x1 - x0, y1 - y0
                indx,indy = int(x0*W),int(y0*H) #different to paper, here using left-top to determinet cell

                ious = []
                pws, phs = [1/16.0,1/16.0],[1/16.0,2*1/16.0] #!!!!
                #comparsion between anchor and object bbox(resized to last layer)
                #so anchors stand for size estimation of target in last layer?
                #update: now it switch to ratio
                for pw, ph in zip(pws,phs):
                    intersect = np.minimum(pw,w) * np.minimum(ph,h)
                    ious.append( intersect / (pw*ph + w*h - intersect) )
                bestBoxInd = int(np.argmax(ious))
                boxMask[b,indy,indx,bestBoxInd,:] = 1.0 #select the sell to estimate object
                boxCls[b,indy,indx,bestBoxInd,:] = cls #target class id
                boxObj[b,indy,indx,bestBoxInd,:] = 1.0 #target objectness
                tx,ty = x0 * W - indx, y0 * H - indy #xy is offset from cell left-top(not image)
                #for loss reasion, here set target to be sqrted+
                #updated: using log to replace sqrt (failure if you using log(w) instead of log(1+w)
                tw,th = np.log(1+w),np.log(1+h)
                boxXYWH[b,indy,indx,bestBoxInd,:] = nd.array([tx,ty,tw,th])
        return boxMask, boxCls, boxObj, boxXYWH
def test_detach_updated_grad():
    x = nd.ones((2, 2))
    dx = nd.zeros_like(x)
    y = nd.ones_like(x)
    dy = nd.zeros_like(x)
    mark_variables([x, y], [dx, dy])
    assert x._fresh_grad == False
    assert y._fresh_grad == False

    with train_section():
        x2 = x + 2
        y2  = x2 + y
        y2.backward()
    assert (dx.asnumpy() == 1).all()
    assert x._fresh_grad == True
    assert y._fresh_grad == True

    dx[:] = 0
    x._fresh_grad = False
    y._fresh_grad = False
    assert x._fresh_grad == False
    assert y._fresh_grad == False
    with train_section():
        x2 = x + 2
        x2 = x2.detach()
        y2  = x2 + y
        y2.backward()
    assert (dx.asnumpy() == 0).all()
    assert y._fresh_grad == True
    assert x._fresh_grad == False
def test_detach_updated_grad():
    x = nd.ones((2, 2))
    dx = nd.zeros_like(x)
    y = nd.ones_like(x)
    dy = nd.zeros_like(x)
    mark_variables([x, y], [dx, dy])
    assert x._fresh_grad == False
    assert y._fresh_grad == False

    with record():
        x2 = x + 2
        y2  = x2 + y
        y2.backward()
    assert (dx.asnumpy() == 1).all()
    assert x._fresh_grad == True
    assert y._fresh_grad == True

    dx[:] = 0
    x._fresh_grad = False
    y._fresh_grad = False
    assert x._fresh_grad == False
    assert y._fresh_grad == False
    with record():
        x2 = x + 2
        x2 = x2.detach()
        y2  = x2 + y
        y2.backward()
    assert (dx.asnumpy() == 0).all()
    assert y._fresh_grad == True
    assert x._fresh_grad == False
Пример #6
0
def get_gradient(crit, real, fake, epsilon):
    mixed_images = epsilon * real + (1 - epsilon) * fake
    mixed_images.attach_grad()
    # with autograd.record():
    mixed_scores = crit(mixed_images)
    grad = autograd.grad(mixed_scores, [mixed_images], retain_graph=True, create_graph=True,
                         head_grads=nd.ones_like(mixed_scores))[0]
    return grad
Пример #7
0
def unsorted_1d_segment_mean(input, seg_id, n_segs, dim):
    # TODO: support other dimensions
    assert dim == 0, 'MXNet only supports segment mean on first dimension'

    n_ones = nd.ones_like(seg_id).astype(input.dtype)
    w = unsorted_1d_segment_sum(n_ones, seg_id, n_segs, 0)
    w = nd.clip(w, a_min=1, a_max=np.inf)
    y = unsorted_1d_segment_sum(input, seg_id, n_segs, dim)
    y = y / w.reshape((-1, ) + (1, ) * (y.ndim - 1))
    return y
Пример #8
0
    def update(self, index, weight, grad, state):
        assert (isinstance(weight, NDArray))
        assert (isinstance(grad, NDArray))
        self._update_count(index)
        lr = self._get_lr(index)
        wd = self._get_wd(index)
        t = self._index_update_count[index]

        with bulk(self._bulk):
            # preprocess grad
            grad *= self.rescale_grad
            if self.clip_gradient is not None:
                grad = clip(grad, -self.clip_gradient, self.clip_gradient)

            mean, var = state
            mean *= self.beta1
            mean += (1. - self.beta1) * grad
            var *= self.beta2
            var += (1. - self.beta2) * square(grad)

            r1 = weight.norm()
            if not self.bias_correction:
                r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound)
                sqrt_var = sqrt(var)
                sqrt_var += self.epsilon
                g = mean / sqrt_var
                g += wd * weight
            else:
                # apply bias correction
                mean_hat = mean / (1. - power(self.beta1, t))
                var_hat = var / (1. - power(self.beta2, t))
                if self._eps_after_sqrt:
                    sqrt(var_hat, out=var_hat)
                    var_hat += self.epsilon
                else:
                    var_hat += self.epsilon
                    sqrt(var_hat, out=var_hat)
                mean_hat /= var_hat
                mean_hat += wd * weight
                g = mean_hat

            r2 = g.norm()

            # calculate lamb_trust_ratio
            ratio = r1 / r2
            # becomes NaN if ratio == NaN or 0, otherwise 0
            nan_or_zero = 1 - ratio / ratio
            r = where(nan_or_zero, ones_like(ratio), ratio)
            lr *= r

            # update weight
            g *= lr
            weight[:] -= g
Пример #9
0
def yolo2_target(scores, output, labels, anchors, ignore_label=-1, thresh=0.5):
    """
    定义一个函数来生成yolo2训练目标
    YOLO2寻找真实目标的方法比较特殊,是在每个格点内各自比较,而不是使用全局的预设。
    这里我们使用了一个技巧:sample_weight(个体权重)矩阵, 用于损失函数内部权重的调整,
    我们也可以通过权重矩阵来控制哪些个体需要被屏蔽,这一点在目标检测中尤其重要,因为往往大多数的背景区域不需要预测检测框。

    网络预测的输出为 (32,16,16,2,5)
    而label的形式为:labels 即 ground truth(32,1,5),其中 5 包括一个class label:0,以及左上、右下两个corner相对于整张图的坐标
    模型回归的目标形式:
    """
    b, h, w, n, _ = scores.shape
    anchors = np.reshape(np.array(anchors), (-1, 2))
    """ 这里传入scores只是为了用其shape和context
    scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1)
    boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1)
    gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1)
    """
    target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label
    target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context)
    sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    for b in range(output.shape[0]):
        # find the best match for each ground-truth
        label = labels[b].asnumpy()
        valid_label = label[np.where(label[:, 0] > -0.5)[0], :]
        # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly
        np.random.shuffle(valid_label)
        for l in valid_label:
            gx, gy, gw, gh = (l[1] + l[3]) / 2, (
                l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2]
            ind_x = int(gx * w)
            ind_y = int(gy * h)
            tx = gx * w - ind_x
            ty = gy * h - ind_y
            gw = gw * w
            gh = gh * h
            # find the best match using width and height only, assuming centers are identical
            intersect = np.minimum(anchors[:, 0], gw) * np.minimum(
                anchors[:, 1], gh)
            ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] -
                                intersect)
            best_match = int(np.argmax(ovps))
            target_id[b, ind_y, ind_x, best_match, :] = l[0]
            target_score[b, ind_y, ind_x, best_match, :] = 1.0
            tw = np.log(gw / anchors[best_match, 0])
            th = np.log(gh / anchors[best_match, 1])
            target_box[b, ind_y, ind_x,
                       best_match, :] = mx.nd.array([tx, ty, tw, th])
            sample_weight[b, ind_y, ind_x, best_match, :] = 1.0
            # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1])
    return target_id, target_score, target_box, sample_weight
Пример #10
0
    def forward(self, is_train, req, in_data, out_data, aux):
        arm_cls_preds = in_data[0]
        odm_cls_target = in_data[1]
        odm_loc_target_mask = in_data[2]

        arm_cls_preds = nd.softmax(data=arm_cls_preds)
        arm_cls_preds_classes = nd.split(data=arm_cls_preds,axis=1,num_outputs=2)
        # arm_cls_preds_bg shape : (batch , h*w*num_anchors[:layers]) 负类【0】
        arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_classes[0],shape=(0,-1))
        prob_temp = nd.ones_like(arm_cls_preds_bg)*0.99
        cond1 = arm_cls_preds_bg >= prob_temp # > 0.99 idx is 1
        # print('negative cond1 ------- :',heapq.nlargest(2,arm_cls_preds_bg[0]))
        temp1 = nd.ones_like(odm_cls_target)*(-1) ### TODO: 0 还是-1表示背景??
        # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的anchor标号中去掉(-1替代),负类转换为背景
        odm_cls_target_mask = nd.where(condition=cond1,x=temp1,y=odm_cls_target)

        # apply filtering to odm_loc_target_mask
        # odm_loc_target_mask_shape: (batch, num_anchors, 4)

        arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_bg,shape=(0,-1,1))#(batch , h*w*num_anchors[:layers],1)
        # (batch , h*w*num_anchors[:layers] , 4 )
        odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,4))
        odm_loc_target_mask = odm_loc_target_mask[:,:,0] #(batch , h*w*num_anchors[:layers])
        #(batch , h*w*num_anchors[:layers], 1)
        ## 取整个batch中 所有行的 第一列,相当于对原来的4个相同label[0 0 0 0 ],[1 1 1 1]变成[0],[1]
        odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,1))
        loc_temp = nd.ones_like(odm_loc_target_mask)*0.99
        cond2 = arm_cls_preds_bg >= loc_temp
        temp2 = nd.zeros_like(odm_loc_target_mask) # 取0
        # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的掩码置0
        ## 实际上不管IOU计算的大小,用AMR的分类结果,如果是大于0.99的负类,不管通过IOU判断的正负类结果如何,都设置为背景
        odm_loc_target_bg_mask = nd.where(cond2,temp2,odm_loc_target_mask)
        odm_loc_target_bg_mask = nd.concat(*[odm_loc_target_bg_mask]*4,dim=2)
        # 还原维度
        odm_loc_target_bg_mask = nd.reshape(odm_loc_target_bg_mask,shape=(0,-1))

        for ind, val in enumerate([odm_cls_target_mask, odm_loc_target_bg_mask]):
            self.assign(out_data[ind], req[ind], val)
Пример #11
0
def get_disc_loss(gen, disc, loss_fn, X, batch_size, z_dim, ctx):
    # loss from real images
    y_pred_real = disc(X).reshape(X.shape[0], -1)
    y_true_real = nd.ones_like(y_pred_real)
    loss_real = loss_fn(y_pred_real, y_true_real)
    # loss from fake images
    z = nd.random.randn(batch_size, z_dim, 1, 1, ctx=ctx)
    xhat = gen(z).detach()
    y_pred_fake = disc(xhat).reshape(X.shape[0])
    y_true_fake = nd.zeros_like(y_pred_fake)
    loss_fake = loss_fn(y_pred_fake, y_true_fake)
    # total discriminator loss
    loss = 0.5 * (loss_real + loss_fake)
    return loss
Пример #12
0
def get_crit_loss(gen, crit, real, batch_size, z_dim, ctx):
    z = nd.random.randn(batch_size, z_dim, 1, 1, ctx=ctx)
    fake = gen(z).detach()
    y_pred_fake = crit(fake).reshape(real.shape[0], -1)
    y_pred_real = crit(real).reshape(real.shape[0], -1)
    epsilon = np.random.rand(len(real), 1, 1, 1)
    epsilon = nd.array(epsilon, ctx=ctx)
    # grad = get_gradient(crit, X, Xhat.detach(), epsilon)

    mixed_images = epsilon * real + (1 - epsilon) * fake
    mixed_images.attach_grad()
    # with autograd.record():
    mixed_scores = crit(mixed_images)
    grad = autograd.grad(mixed_scores, [mixed_images], retain_graph=True, create_graph=True,
                         head_grads=nd.ones_like(mixed_scores))[0]
    gp = gradient_penalty(grad)
    crit_loss = crit_loss_fn(y_pred_fake, y_pred_real, gp, C_LAMBDA)
    return crit_loss
Пример #13
0
def yolo2_target(scores, output, labels, anchors, ignore_label=-1, thresh=0.5):
    """Generate training targets given predictions and labels."""
    b, h, w, n, _ = scores.shape
    anchors = np.reshape(np.array(anchors), (-1, 2))
    #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1)
    #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1)
    gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1)
    target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label
    target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context)
    sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    for b in range(output.shape[0]):
        # find the best match for each ground-truth
        label = labels[b].asnumpy()
        valid_label = label[np.where(label[:, 0] > -0.5)[0], :]
        # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly
        np.random.shuffle(valid_label)
        for l in valid_label:
            gx, gy, gw, gh = (l[1] + l[3]) / 2, (
                l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2]
            ind_x = int(gx * w)
            ind_y = int(gy * h)
            tx = gx * w - ind_x
            ty = gy * h - ind_y
            gw = gw * w
            gh = gh * h
            # find the best match using width and height only, assuming centers are identical
            intersect = np.minimum(anchors[:, 0], gw) * np.minimum(
                anchors[:, 1], gh)
            ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] -
                                intersect)
            best_match = int(np.argmax(ovps))
            target_id[b, ind_y, ind_x, best_match, :] = l[0]
            target_score[b, ind_y, ind_x, best_match, :] = 1.0
            tw = np.log(gw / anchors[best_match, 0])
            th = np.log(gh / anchors[best_match, 1])
            target_box[b, ind_y, ind_x,
                       best_match, :] = mx.nd.array([tx, ty, tw, th])
            sample_weight[b, ind_y, ind_x, best_match, :] = 1.0
            # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1])
    return target_id, target_score, target_box, sample_weight
Пример #14
0
def parse_groundtruth_for_target(labels, box_per_cell, xywh):
    B, H, W, A, _ = xywh.shape
    _, maxObjNum, _ = labels.shape
    #pdb.set_trace()
    boxMask = nd.zeros((B, H, W, A, 1), ctx=xywh.context)
    boxCls = nd.ones_like(boxMask, ctx=xywh.context) * (
        -1)  #default negative label
    boxObject = nd.zeros((B, H, W, A, 1), ctx=xywh.context)
    boxXYWH = nd.zeros((B, H, W, A, 4), ctx=xywh.context)
    for b in range(B):
        label = labels[b].asnumpy()
        validLabel = label[np.where(label[:, 1] > -0.5)[0], :]
        #pdb.set_trace()
        np.random.shuffle(validLabel)
        for l in validLabel:
            cls, x0, y0, x1, y1 = l
            w = x1 - x0
            h = y1 - y0
            #find best box for this object
            indx, indy = int(x0 * W), int(y0 * H)  #position
            pws, phs = xywh[b, indy, indx, :, -2], xywh[b, indy, indx, :, -1]
            ious = []
            pws = pws.asnumpy()
            phs = phs.asnumpy()
            pws, phs = [1, 1], [1, 1]

            for pw, ph in zip(pws, phs):
                intersect = np.minimum(pw, w * W) * np.minimum(ph, h * H)
                ious.append(intersect / (pw * ph + w * h - intersect))
            #pdb.set_trace()
            bestbox = int(np.argmax(ious))
            boxMask[b, indy, indx, bestbox, :] = 1.0
            boxCls[b, indy, indx, bestbox, :] = cls
            boxObject[b, indy, indx, bestbox, :] = 1.0  # ious[bestbox]
            tx = x0 * W - indx
            ty = y0 * H - indy
            tw, th = math.sqrt(w), math.sqrt(h)  #predict sqrt(w) sqrt(h)
            #pdb.set_trace()
            boxXYWH[b, indy, indx, bestbox, :] = nd.array([tx, ty, tw, th])
    return boxMask, boxCls, boxObject, boxXYWH
Пример #15
0
def parse_groundtruth_for_target(labels, box_per_cell, xywh):
    B,H,W,A,_ = xywh.shape
    _,maxObjNum,_ = labels.shape
    #pdb.set_trace()
    boxMask = nd.zeros( (B,H,W,A,1), ctx = xywh.context )
    boxCls = nd.ones_like(boxMask, ctx = xywh.context) * (-1) #default negative label
    boxObject = nd.zeros((B,H,W,A,1),ctx = xywh.context)
    boxXYWH = nd.zeros((B,H,W,A,4), ctx = xywh.context)
    for b in range(B):
        label  = labels[b].asnumpy()
        validLabel = label[np.where(label[:,1] >-0.5)[0],:]
        #pdb.set_trace()
        np.random.shuffle(validLabel)
        for l in validLabel:
            cls,x0,y0,x1,y1 = l
            w = x1 - x0
            h = y1 - y0
            #find best box for this object
            indx,indy = int(x0*W), int(y0*H) #position
            pws, phs = xywh[b,indy, indx, :, -2], xywh[b,indy,indx,:,-1]
            ious = []
            pws = pws.asnumpy()
            phs = phs.asnumpy()
            pws, phs = [1,1],[1,1]
            
            for pw, ph in zip(pws,phs):
                intersect = np.minimum(pw,w*W) * np.minimum(ph,h*H)
                ious.append(  intersect / (pw * ph + w * h - intersect) )
            #pdb.set_trace()
            bestbox = int(np.argmax(ious))
            boxMask[b,indy,indx,bestbox,:] = 1.0
            boxCls[b,indy,indx,bestbox,:] = cls
            boxObject[b,indy,indx,bestbox,:] = 1.0 # ious[bestbox]
            tx = x0 * W - indx
            ty = y0 * H - indy
            tw,th = math.sqrt(w),  math.sqrt(h) #predict sqrt(w) sqrt(h)
            #pdb.set_trace()
            boxXYWH[b,indy,indx,bestbox,:] = nd.array([tx,ty,tw,th])
    return boxMask, boxCls, boxObject,boxXYWH
Пример #16
0
def main():
    # Initialize problem parameters
    batch_size = 1
    prediction_length = 50
    context_length = 5
    axis = [-5, 5, -3, 3]
    float_type = np.float64
    ctx = mx.Context("gpu")

    num_samples = 3
    ts_idx = 0

    # Initialize test data to generate Gaussian Process from
    lb = -5
    ub = 5
    dx = (ub - lb) / (prediction_length - 1)
    x_test = nd.arange(lb, ub + dx, dx, ctx=ctx,
                       dtype=float_type).reshape(-1, 1)
    x_test = nd.tile(x_test, reps=(batch_size, 1, 1))

    # Define the GP hyper parameters
    amplitude = nd.ones((batch_size, 1, 1), ctx=ctx, dtype=float_type)
    length_scale = math.sqrt(0.4) * nd.ones_like(amplitude)
    sigma = math.sqrt(1e-5) * nd.ones_like(amplitude)

    # Instantiate desired kernel object and compute kernel matrix
    rbf_kernel = RBFKernel(amplitude, length_scale)

    # Generate samples from 0 mean Gaussian process with RBF Kernel and plot it
    gp = GaussianProcess(
        sigma=sigma,
        kernel=rbf_kernel,
        prediction_length=prediction_length,
        context_length=context_length,
        num_samples=num_samples,
        ctx=ctx,
        float_type=float_type,
        sample_noise=False,  # Returns sample without noise
    )
    mean = nd.zeros((batch_size, prediction_length), ctx=ctx, dtype=float_type)
    covariance = rbf_kernel.kernel_matrix(x_test, x_test)
    gp.plot(x_test=x_test, samples=gp.sample(mean, covariance), ts_idx=ts_idx)

    # Generate training set on subset of interval using the sine function
    x_train = nd.array([-4, -3, -2, -1, 1], ctx=ctx,
                       dtype=float_type).reshape(context_length, 1)
    x_train = nd.tile(x_train, reps=(batch_size, 1, 1))
    y_train = nd.sin(x_train.squeeze(axis=2))

    # Predict exact GP using the GP predictive mean and covariance using the same fixed hyper-parameters
    samples, predictive_mean, predictive_std = gp.exact_inference(
        x_train, y_train, x_test)

    assert (np.sum(np.isnan(
        samples.asnumpy())) == 0), "NaNs in predictive samples!"

    gp.plot(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        ts_idx=ts_idx,
        mean=predictive_mean,
        std=predictive_std,
        samples=samples,
        axis=axis,
    )
    def inference_g(self, observed_arr):
        '''
        Inference with generator.

        Args:
            observed_arr:       `mxnet.ndarray` of observed data points.
        
        Returns:
            Tuple data.
            - re-parametric data.
            - encoded data points.
            - re-encoded data points.
        '''
        generated_arr, encoded_arr, re_encoded_arr = super().inference_g(observed_arr)

        if autograd.is_recording():
            limit = self.long_term_seq_len

            seq_len = self.noise_sampler.seq_len
            self.noise_sampler.seq_len = limit
            long_term_observed_arr = self.noise_sampler.draw()

            observed_mean_arr = nd.expand_dims(nd.mean(long_term_observed_arr, axis=1), axis=1)
            sum_arr = None
            for seq in range(2, long_term_observed_arr.shape[1]):
                add_arr = nd.sum(long_term_observed_arr[:, :seq] - observed_mean_arr, axis=1)
                if sum_arr is None:
                    sum_arr = nd.expand_dims(add_arr, axis=0)
                else:
                    sum_arr = nd.concat(
                        sum_arr,
                        nd.expand_dims(add_arr, axis=0),
                        dim=0
                    )
            max_arr = nd.max(sum_arr, axis=0)
            min_arr = nd.min(sum_arr, axis=0)

            diff_arr = long_term_observed_arr - observed_mean_arr
            std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2)
            R_S_arr = (max_arr - min_arr) / std_arr
            len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_observed_arr.shape[1] / 2)
            observed_H_arr = nd.log(R_S_arr) / len_arr

            self.noise_sampler.seq_len = seq_len

            g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1)
            g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1)
            o_min_arr = nd.expand_dims(observed_arr.min(axis=1), axis=1)
            o_max_arr = nd.expand_dims(observed_arr.max(axis=1), axis=1)

            _observed_arr = generated_arr

            long_term_generated_arr = None
            for i in range(limit):
                generated_arr, _, _ = super().inference_g(_observed_arr)

                g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1)
                g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1)
                o_min_arr = nd.expand_dims(_observed_arr.min(axis=1), axis=1)
                o_max_arr = nd.expand_dims(_observed_arr.max(axis=1), axis=1)
                generated_arr = (generated_arr - g_min_arr) / (g_max_arr - g_min_arr)
                generated_arr = (o_max_arr - o_min_arr) * generated_arr
                generated_arr = o_min_arr + generated_arr

                if self.condition_sampler is not None:
                    self.condition_sampler.output_shape = generated_arr.shape
                    noise_arr = self.condition_sampler.generate()
                    generated_arr += noise_arr

                if long_term_generated_arr is None:
                    long_term_generated_arr = generated_arr
                else:
                    long_term_generated_arr = nd.concat(
                        long_term_generated_arr,
                        generated_arr,
                        dim=1
                    )

                _observed_arr = generated_arr

            generated_mean_arr = nd.expand_dims(nd.mean(long_term_generated_arr, axis=1), axis=1)
            sum_arr = None
            for seq in range(2, long_term_generated_arr.shape[1]):
                add_arr = nd.sum(long_term_generated_arr[:, :seq] - generated_mean_arr, axis=1)
                if sum_arr is None:
                    sum_arr = nd.expand_dims(add_arr, axis=0)
                else:
                    sum_arr = nd.concat(
                        sum_arr,
                        nd.expand_dims(add_arr, axis=0),
                        dim=0
                    )
            max_arr = nd.max(sum_arr, axis=0)
            min_arr = nd.min(sum_arr, axis=0)

            diff_arr = long_term_generated_arr - generated_mean_arr
            std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2)
            R_S_arr = (max_arr - min_arr) / std_arr
            len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_generated_arr.shape[1] / 2)
            generated_H_arr = nd.log(R_S_arr) / len_arr

            multi_fractal_loss = nd.abs(generated_H_arr - observed_H_arr)
            multi_fractal_loss = nd.mean(multi_fractal_loss, axis=0, exclude=True)
            multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1)
            multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1)

            generated_arr = generated_arr + multi_fractal_loss

        return generated_arr, encoded_arr, re_encoded_arr
Пример #18
0
    def forward(self,
                inputs,
                loss=None,
                training=True,
                commtype='average',
                topo='FUC'):
        assert len(inputs) == self.slots + 1

        # if self.non_local_mode:
        #     return self.forward_non_local(inputs, loss, training)
        # if self.message_embedding:
        #     return self.forward_message_embedding(inputs, loss, training)

        local_drop_vec = nd.ones_like(inputs[0])
        local_drop_vec = self.local_dropout_op(local_drop_vec)
        for i in range(self.slots):
            inputs[i] = inputs[i] * local_drop_vec
        inputs[-1] = self.global_dropout_op(inputs[-1])

        # if topo == 'FC':
        #     comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        # elif topo == 'FUC':
        #     comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1))
        # elif topo == 'Master':
        #     comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        #     for i in range(self.slots):
        #         for j in range(self.slots):
        #             comm_rate[i][j] = 0

        # if self.use_comm and self.topo_learning_mode:
        #     proba = nd.sigmoid(self.topo.data())

        #     if random.random() < 1e-2:
        #         print '---------------------------------------------'
        #         print proba.asnumpy()
        #         print '---------------------------------------------'

        #     u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1))
        #     comm_rate = nd.sigmoid(10. * (
        #         nd.log(proba) - nd.log(1. - proba) +
        #         nd.log(u_vec) - nd.log(1. - u_vec)
        #     ))
        #     if loss is not None:
        #         loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))

        results = []
        for i in range(self.slots):
            results.append(self.local_share_trans.forward(inputs[i]))
        results.append(self.local_share_trans.forward(inputs[-1]))

        # if self.use_comm:
        #     if self.topo_learning_mode:
        #         assert self.concrete_share_rate is False
        #         for i in range(self.slots):
        #             tmp = nd.zeros_like(results[i])
        #             norm = nd.zeros_like(comm_rate[0][0])
        #             for j in range(self.slots):
        #                 if i != j:
        #                     tmp = tmp + self.local2local_share_comm(inputs[j], training=training) * comm_rate[j][i]
        #                     norm = norm + comm_rate[j][i]
        #             # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
        #             tmp = tmp + self.global2local_comm(inputs[-1], training=training) * comm_rate[-1][i]
        #             norm = norm + comm_rate[-1][i]
        #             if nd.sum(norm) > 1e-5:
        #                 results[i] = results[i] + tmp / norm

        #         tmp = nd.zeros_like(results[-1])
        #         norm = nd.zeros_like(comm_rate[0][0])
        #         for j in range(self.slots):
        #             tmp = tmp + self.local2global_comm(inputs[j], training=training) * comm_rate[j][-1]
        #             norm = norm + comm_rate[j][-1]
        #         if nd.sum(norm) > 1e-5:
        #             results[-1] = results[-1] + tmp / norm
        #     else:
        #         if commtype == 'average':
        #             for i in range(self.slots):
        #                 tmp = nd.zeros_like(results[i])
        #                 norm = nd.zeros_like(comm_rate[0][0])
        #                 for j in range(self.slots):
        #                     if i != j:
        #                         tmp = tmp + self.local2local_share_comm.forward(nd.concat(*[inputs[i], inputs[j]], dim=1), training=training) * comm_rate[j][i]
        #                         norm = norm + comm_rate[j][i]
        #                 # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
        #                 tmp = tmp + self.global2local_comm.forward(nd.concat(*[inputs[i], inputs[-1]], dim=1), training=training) * comm_rate[-1][i]
        #                 norm = norm + comm_rate[-1][i]
        #                 if nd.sum(norm) > 1e-5:
        #                     results[i] = results[i] + tmp / norm

        #             tmp = nd.zeros_like(results[-1])
        #             norm = nd.zeros_like(comm_rate[0][0])
        #             for j in range(self.slots):
        #                 tmp = tmp + self.local2global_comm.forward(nd.concat(*[inputs[j], inputs[-1]], dim=1), training=training) * comm_rate[j][-1]
        #                 norm = norm + comm_rate[j][-1]
        #             if nd.sum(norm) > 1e-5:
        #                 results[-1] = results[-1] + tmp / norm

        #         elif commtype == 'maxpooling':
        #             for i in range(self.slots):
        #                 tmp = []
        #                 for j in range(self.slots):
        #                     if j != i:
        #                         tmp.append(self.local2local_share_comm.forward(inputs[j], training=training))
        #                 tmp.append(self.global2local_comm.forward(inputs[-1], training=training))

        #                 for k in range(len(tmp)):
        #                     tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

        #                 tmp = nd.concat(*tmp, dim=1)
        #                 maxcomm = nd.max(tmp, axis=1)
        #                 results[i] = results[i] + maxcomm

        #             tmp = []
        #             for i in range(self.slots):
        #                 tmp.append(self.local2global_comm.forward(inputs[i], training=training))
        #             for k in range(len(tmp)):
        #                 tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

        #             tmp = nd.concat(*tmp, dim=1)
        #             maxcomm = nd.max(tmp, axis=1)
        #             results[-1] = results[-1] + maxcomm

        # if self.block_mode:
        #     assert self.local_in_units == self.local_units
        #     assert self.global_in_units == self.global_units

        #     for i in range(self.slots):
        #         results[i] = self.yz_weight_local(results[i], training=training) + inputs[i]
        #     results[-1] = self.yz_weight_global(results[-1], training=training) + inputs[-1]

        return results
Пример #19
0
    def forward(self, inputs, loss=None, training=True, commtype='average', topo='FC'):
        assert len(inputs) == self.slots + 1

        local_drop_vec = nd.ones_like(inputs[0])
        local_drop_vec = self.local_dropout_op(local_drop_vec)
        for i in range(self.slots):
            inputs[i] = inputs[i] * local_drop_vec
        inputs[-1] = self.global_dropout_op(inputs[-1])

        if topo == 'FC':
            comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        elif topo == 'FUC':
            comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1))
        elif topo == 'Master':
            comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
            for i in range(self.slots):
                for j in range(self.slots):
                    comm_rate[i][j] = 0

        if self.use_comm and self.topo_learning_mode:
            proba = nd.sigmoid(self.topo.data())

            if random.random() < 1e-2:
                print '---------------------------------------------'
                print proba.asnumpy()
                print '---------------------------------------------'

            u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1))
            comm_rate = nd.sigmoid(10. * (
                    nd.log(proba) - nd.log(1. - proba) +
                    nd.log(u_vec) - nd.log(1. - u_vec)
            ))
            if loss is not None:
                loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))

        results = []
        for i in range(self.slots):
            results.append(self.local_share_trans.forward(inputs[i], training=training))
        results.append(self.global_trans.forward(inputs[-1], training=training))

        if commtype == 'average':
            for i in range(self.slots):
                tmp = nd.zeros_like(results[i])
                norm = nd.zeros_like(comm_rate[0][0])
                for j in range(self.slots):
                    if i != j:
                        tmp = tmp + self.local2local_share_comm.forward(nd.concat(inputs[j], dim=1),
                                                                        training=training) * comm_rate[j][i]
                        norm = norm + comm_rate[j][i]
                # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
                tmp = tmp + self.global2local_comm.forward(nd.concat(inputs[-1], dim=1), training=training) * \
                      comm_rate[-1][i]
                norm = norm + comm_rate[-1][i]
                if nd.sum(norm) > 1e-5:
                    results[i] = results[i] + tmp / norm

            tmp = nd.zeros_like(results[-1])
            norm = nd.zeros_like(comm_rate[0][0])
            for j in range(self.slots):
                tmp = tmp + self.local2global_comm.forward(nd.concat(inputs[j], dim=1), training=training) * \
                      comm_rate[j][-1]
                norm = norm + comm_rate[j][-1]
            if nd.sum(norm) > 1e-5:
                results[-1] = results[-1] + tmp / norm

        elif commtype == 'maxpooling':
            for i in range(self.slots):
                tmp = []
                for j in range(self.slots):
                    if j != i:
                        tmp.append(self.local2local_share_comm.forward(inputs[j], training=training))
                tmp.append(self.global2local_comm.forward(inputs[-1], training=training))

                for k in range(len(tmp)):
                    tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

                tmp = nd.concat(*tmp, dim=1)
                maxcomm = nd.max(tmp, axis=1)
                results[i] = results[i] + maxcomm

            tmp = []
            for i in range(self.slots):
                tmp.append(self.local2global_comm.forward(inputs[i], training=training))
            for k in range(len(tmp)):
                tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

            tmp = nd.concat(*tmp, dim=1)
            maxcomm = nd.max(tmp, axis=1)
            results[-1] = results[-1] + maxcomm

        return results
Пример #20
0
    def forward(self, inputs, loss=None):
        assert len(inputs) == self.slots + 1

        if self.non_local_mode:
            return self.forward_multidims(inputs, loss)
        if self.message_embedding:
            return self.forward_message_embedding(inputs, loss)

        local_drop_vec = nd.ones_like(inputs[0])
        local_drop_vec = self.local_dropout_op(local_drop_vec)
        for i in range(self.slots):
            inputs[i] = inputs[i] * local_drop_vec
        inputs[-1] = self.global_dropout_op(inputs[-1])

        # local_share_vec = []
        # local_private_vec = []
        # if self.concrete_share_rate:
        #     raise ValueError('no share_private!!!')
        #     for i in range(self.slots):
        #         proba = nd.sigmoid(data=self.share_rate[i].data())
        #         proba = nd.broadcast_axis(data=proba, axis=(0, 1), size=inputs[0].shape)
        #         u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=inputs[0].shape, ctx=CTX)
        #         local_share_vec.append(nd.sigmoid(10. * (
        #             nd.log(proba) - nd.log(1. - proba) +
        #             nd.log(u_vec) - nd.log(1. - u_vec)
        #         )))
        #         local_private_vec.append(1. - local_share_vec[i])
        #         # print 'proba:', proba
        #         # print 'dropout_regularizer:', self.dropout_regularizer
        #         if loss is not None:
        #             loss.append(
        #                 self.dropout_regularizer * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))
        #     if random.random() < 0.01:
        #         for i in range(self.slots):
        #             proba = nd.sigmoid(data=self.share_rate[i].data())
        #             print proba.asnumpy(),
        #         print ''
        # else:
        #     local_share_vec = [nd.ones_like(inputs[0]), ] * self.slots
        #     local_private_vec = [nd.zeros_like(inputs[0]), ] * self.slots
        # local_share_vec = (1. - self.private_rate) * nd.Dropout(
        #     nd.ones(shape=(inputs[0].shape[0], self.local_units)), p=self.private_rate, mode='always')
        # local_private_vec = 1. - local_share_vec

        comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        if self.use_comm and self.topo_learning_mode:
            proba = nd.sigmoid(self.topo.data())

            if random.random() < 1e-2:
                print '---------------------------------------------'
                print proba.asnumpy()
                print '---------------------------------------------'

            u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1))
            comm_rate = nd.sigmoid(10. * (
                nd.log(proba) - nd.log(1. - proba) +
                nd.log(u_vec) - nd.log(1. - u_vec)
            ))
            if loss is not None:
                loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))

        results = []
        for i in range(self.slots):
            results.append(self.local_share_trans(inputs[i]))
        results.append(self.global_trans(inputs[-1]))

        if self.use_comm:
            if self.topo_learning_mode:
                assert self.concrete_share_rate is False
                for i in range(self.slots):
                    tmp = nd.zeros_like(results[i])
                    norm = nd.zeros_like(comm_rate[0][0])
                    for j in range(self.slots):
                        if i != j:
                            tmp = tmp + self.local2local_share_comm(inputs[j]) * comm_rate[j][i]
                            norm = norm + comm_rate[j][i]
                    # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
                    tmp = tmp + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
                    norm = norm + comm_rate[-1][i]
                    if nd.sum(norm) > 1e-5:
                        results[i] = results[i] + tmp / norm

                tmp = nd.zeros_like(results[-1])
                norm = nd.zeros_like(comm_rate[0][0])
                for j in range(self.slots):
                    tmp = tmp + self.local2global_comm(inputs[j]) * comm_rate[j][-1]
                    norm = norm + comm_rate[j][-1]
                if nd.sum(norm) > 1e-5:
                    results[-1] = results[-1] + tmp / norm
            else:
                for i in range(self.slots):
                    tmp = nd.zeros_like(results[i])
                    for j in range(self.slots):
                        if j != i:
                            tmp = tmp + self.local2local_share_comm(inputs[j])
                    tmp = tmp + self.global2local_comm(inputs[-1])
                    results[i] = results[i] + (tmp / float(self.slots))

                tmp = nd.zeros_like(results[-1])
                for i in range(self.slots):
                    tmp = tmp + self.local2global_comm(inputs[i])
                results[-1] = results[-1] + (tmp / float(self.slots))

        if self.block_mode:
            assert self.local_in_units == self.local_units
            assert self.global_in_units == self.global_units

            for i in range(self.slots):
                results[i] = self.yz_weight_local(results[i]) + inputs[i]
            results[-1] = self.yz_weight_global(results[-1]) + inputs[-1]

        return results