def normalize(input, p=2, dim=1, eps=1e-12): r''' Performs L_p normalization of inputs over specified dimension. Args: input – input array of any shape p (float) – the exponent value in the norm formulation. Default: 2 dim (int) – the dimension to reduce. Default: 1 eps (float) – small value to avoid division by zero. Default: 1e-12 Example: >>> x = jt.random((6,3)) [[0.18777736 0.9739261 0.77647036] [0.13710196 0.27282116 0.30533272] [0.7272278 0.5174613 0.9719775 ] [0.02566639 0.37504175 0.32676998] [0.0231761 0.5207773 0.70337296] [0.58966476 0.49547017 0.36724383]] >>> jt.normalize(x) [[0.14907198 0.7731768 0.61642134] [0.31750825 0.63181424 0.7071063 ] [0.5510936 0.39213243 0.736565 ] [0.05152962 0.7529597 0.656046 ] [0.02647221 0.59484214 0.80340654] [0.6910677 0.58067477 0.4303977 ]] ''' assert p == 2 if p == 2: return input / jt.maximum(input.sqr().sum(dim, True).sqrt(), eps)
def test_segfault(self): a = jt.array([1.0,2.0,3.0]) b = (jt.maximum(a, 0)).sum() * 2.0 da = jt.grad(b, a) jt.sync_all() assert (a.data==[1,2,3]).all() assert (da.data==[2,2,2]).all()
def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'): # Note num_classes here is without the background class so cfg.num_classes-1 batch_size, num_classes, mask_h, mask_w = segment_data.shape loss_s = 0 for idx in range(batch_size): cur_segment = segment_data[idx] cur_class_t = class_t[idx] with jt.no_grad(): downsampled_masks = nn.interpolate( mask_t[idx].unsqueeze(0), (mask_h, mask_w), mode=interpolation_mode, align_corners=False).squeeze(0) downsampled_masks = (downsampled_masks > 0.5).float() # Construct Semantic Segmentation segment_t = jt.zeros_like(cur_segment) segment_t.stop_grad() for obj_idx in range(downsampled_masks.shape[0]): segment_t[cur_class_t[obj_idx]] = jt.maximum( segment_t[cur_class_t[obj_idx]], downsampled_masks[obj_idx]) loss_s += nn.BCEWithLogitsLoss(size_average=False)(cur_segment, segment_t) return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha
def execute(self, pred, target, weight=None): pred_left = pred[:, 0] pred_top = pred[:, 1] pred_right = pred[:, 2] pred_bottom = pred[:, 3] target_left = target[:, 0] target_top = target[:, 1] target_right = target[:, 2] target_bottom = target[:, 3] target_area = (target_left + target_right) * \ (target_top + target_bottom) pred_area = (pred_left + pred_right) * \ (pred_top + pred_bottom) w_intersect = jt.minimum(pred_left, target_left) + jt.minimum( pred_right, target_right) g_w_intersect = jt.maximum(pred_left, target_left) + jt.maximum( pred_right, target_right) h_intersect = jt.minimum(pred_bottom, target_bottom) + jt.minimum( pred_top, target_top) g_h_intersect = jt.maximum(pred_bottom, target_bottom) + jt.maximum( pred_top, target_top) ac_uion = g_w_intersect * g_h_intersect + 1e-7 area_intersect = w_intersect * h_intersect area_union = target_area + pred_area - area_intersect ious = (area_intersect + 1.0) / (area_union + 1.0) gious = ious - (ac_uion - area_union) / ac_uion if self.loc_loss_type == 'iou': losses = -jt.log(ious) elif self.loc_loss_type == 'linear_iou': losses = 1 - ious elif self.loc_loss_type == 'giou': losses = 1 - gious else: raise NotImplementedError if weight is not None and weight.sum() > 0: return (losses * weight).sum() / weight.sum() else: assert losses.numel() != 0 return losses.mean()
def execute(self, x): xmean = jt.mean(x, dims=[2, 3], keepdims=1) x2mean = jt.mean(x * x, dims=[2, 3], keepdims=1) if self.sync and jt.in_mpi: xmean = xmean.mpi_all_reduce("mean") x2mean = x2mean.mpi_all_reduce("mean") xvar = jt.maximum(x2mean - xmean * xmean, 0) norm_x = (x - xmean) / jt.sqrt(xvar + self.eps) w = self.weight.broadcast(x, [0, 2, 3]) b = self.bias.broadcast(x, [0, 2, 3]) return norm_x * w + b
def execute(self, x): N, C, H, W = x.shape assert C == self.num_channels assert C % self.num_groups == 0 x = x.reshape((N, self.num_groups, int(C / self.num_groups), H * W)) xmean = jt.mean(x, dims=[2, 3], keepdims=1) x2mean = jt.mean(x * x, dims=[2, 3], keepdims=1) xvar = jt.maximum(x2mean - xmean * xmean, 0) norm_x = (x - xmean) / jt.sqrt(xvar + self.eps) w = self.weight.reshape((1, self.num_groups, C // self.num_groups, 1)) b = self.bias.reshape((1, self.num_groups, C // self.num_groups, 1)) return (norm_x * w + b).reshape((N, C, H, W))
def bbox_iou(bbox_a, bbox_b): assert bbox_a.shape[1]==4 and bbox_b.shape[1]==4 # top left tl = jt.maximum(bbox_a[:, :2].unsqueeze(1), bbox_b[:, :2]) # bottom right br = jt.minimum(bbox_a[:,2:].unsqueeze(1), bbox_b[:, 2:]) area_i = jt.prod(br - tl, dim=2) * (tl < br).all(dim=2) area_a = jt.prod(bbox_a[:, 2:] - bbox_a[:, :2], dim=1) area_b = jt.prod(bbox_b[:, 2:] - bbox_b[:, :2], dim=1) return area_i / (area_a.unsqueeze(1) + area_b - area_i)
def partCombiner2_bg(center, eyel, eyer, nose, mouth, hair, bg, maskh, maskb, comb_op = 1, load_h = 512, load_w = 512): if comb_op == 0: # use max pooling, pad black for eyes etc padvalue = -1 hair = masked(hair, maskh) bg = masked(bg, maskb) else: # use min pooling, pad white for eyes etc padvalue = 1 hair = addone_with_mask(hair, maskh) bg = addone_with_mask(bg, maskb) ratio = load_h // 256 rhs = np.array([EYE_H,EYE_H,NOSE_H,MOUTH_H]) * ratio rws = np.array([EYE_W,EYE_W,NOSE_W,MOUTH_W]) * ratio bs,nc,_,_ = eyel.shape eyel_p = jt.ones((bs,nc,load_h,load_w)) eyer_p = jt.ones((bs,nc,load_h,load_w)) nose_p = jt.ones((bs,nc,load_h,load_w)) mouth_p = jt.ones((bs,nc,load_h,load_w)) locals = [eyel, eyer, nose, mouth] locals_p = [eyel_p, eyer_p, nose_p, mouth_p] for i in range(bs): c = center[i].data#x,y for j in range(4): locals_p[j][i] = jt.nn.ConstantPad2d((int(c[j,0]-rws[j]/2), int(load_w-(c[j,0]+rws[j]/2)), int(c[j,1]-rhs[j]/2), int(load_h-(c[j,1]+rhs[j]/2))),padvalue)(locals[j][i]) if comb_op == 0: eyes = jt.maximum(locals_p[0], locals_p[1]) eye_nose = jt.maximum(eyes, locals_p[2]) eye_nose_mouth = jt.maximum(eye_nose, locals_p[3]) eye_nose_mouth_hair = jt.maximum(hair, eye_nose_mouth) result = jt.maximum(bg, eye_nose_mouth_hair) else: eyes = jt.minimum(locals_p[0], locals_p[1]) eye_nose = jt.minimum(eyes, locals_p[2]) eye_nose_mouth = jt.minimum(eye_nose, locals_p[3]) eye_nose_mouth_hair = jt.minimum(hair, eye_nose_mouth) result = jt.minimum(bg, eye_nose_mouth_hair) return result
def bbox2loc(src_bbox,dst_bbox): width = src_bbox[:, 2:3] - src_bbox[:, 0:1] height = src_bbox[:, 3:4] - src_bbox[:, 1:2] center_x = src_bbox[:, 0:1] + 0.5 * width center_y = src_bbox[:, 1:2] + 0.5 * height base_width = dst_bbox[:, 2:3] - dst_bbox[:, 0:1] base_height = dst_bbox[:, 3:4] - dst_bbox[:, 1:2] base_center_x = dst_bbox[:, 0:1] + 0.5 * base_width base_center_y = dst_bbox[:, 1:2] + 0.5 * base_height eps = 1e-5 height = jt.maximum(height, eps) width = jt.maximum(width, eps) dy = (base_center_y - center_y) / height dx = (base_center_x - center_x) / width dw = jt.log(base_width / width) dh = jt.log(base_height / height) loc = jt.contrib.concat([dx,dy,dw,dh],dim=1) return loc
def elemwise_box_iou(box_a, box_b): """ Does the same as above but instead of pairwise, elementwise along the inner dimension. """ max_xy = jt.minimum(box_a[:, 2:], box_b[:, 2:]) min_xy = jt.maximum(box_a[:, :2], box_b[:, :2]) inter = jt.clamp((max_xy - min_xy), min_v=0) inter = inter[:, 0] * inter[:, 1] area_a = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]) area_b = (box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]) union = area_a + area_b - inter union = jt.clamp(union, min_v=0.1) # Return value is [n] for inputs [n, 4] return jt.clamp(inter / union, max_v=1)
def execute(self, x): N = x.shape[0] C = self.num_channels output_shape = (N, -1) # TODO: 3d group norm if x.ndim == 4: output_shape = x.shape assert C % self.num_groups == 0 x = x.reshape((N, self.num_groups, int(C / self.num_groups), -1)) xmean = jt.mean(x, dims=[2, 3], keepdims=1) x2mean = jt.mean(x * x, dims=[2, 3], keepdims=1) xvar = jt.maximum(x2mean - xmean * xmean, 0) norm_x = (x - xmean) / jt.sqrt(xvar + self.eps) if not self.affine: return norm_x.reshape(output_shape) w = self.weight.reshape((1, self.num_groups, C // self.num_groups, 1)) b = self.bias.reshape((1, self.num_groups, C // self.num_groups, 1)) return (norm_x * w + b).reshape(output_shape)
def intersect(box_a, box_b): """ We resize both tensors to [A,B,2] without new malloc: [A,2] -> [A,1,2] -> [A,B,2] [B,2] -> [1,B,2] -> [A,B,2] Then we compute the area of intersect between box_a and box_b. Args: box_a: (tensor) bounding boxes, Shape: [n,A,4]. box_b: (tensor) bounding boxes, Shape: [n,B,4]. Return: (tensor) intersection area, Shape: [n,A,B]. """ n = box_a.shape[0] A = box_a.shape[1] B = box_b.shape[1] max_xy = jt.minimum(box_a[:, :, 2:].unsqueeze(2).expand((n, A, B, 2)), box_b[:, :, 2:].unsqueeze(1).expand((n, A, B, 2))) min_xy = jt.maximum(box_a[:, :, :2].unsqueeze(2).expand((n, A, B, 2)), box_b[:, :, :2].unsqueeze(1).expand((n, A, B, 2))) return jt.clamp(max_xy - min_xy, min_v=0).prod(3) # inter
def intersect(box_a, box_b): """ We resize both tensors to [A,B,2] without new malloc: [A,2] -> [A,1,2] -> [A,B,2] [B,2] -> [1,B,2] -> [A,B,2] Then we compute the area of intersect between box_a and box_b. Args: box_a: (tensor) bounding boxes, Shape: [A,4]. box_b: (tensor) bounding boxes, Shape: [B,4]. Return: (tensor) intersection area, Shape: [A,B]. """ A = box_a.size(0) B = box_b.size(0) max_xy = jt.minimum(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) min_xy = jt.maximum(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2)) inter = jt.clamp((max_xy - min_xy), min_v=0) return inter[:, :, 0] * inter[:, :, 1]
def integrator(raw, z_vals, rays_d, raw_noise_std=0, white_bkgd=False): """Transforms model's predictions to semantically meaningful values. Args: raw: [num_rays, num_samples along ray, 4]. Prediction from model. z_vals: [num_rays, num_samples along ray]. Integration time. rays_d: [num_rays, 3]. Direction of each ray. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. disp_map: [num_rays]. Disparity map. Inverse of depth map. acc_map: [num_rays]. Sum of weights along each ray. weights: [num_rays, num_samples]. Weights assigned to each sampled color. depth_map: [num_rays]. Estimated distance to object. """ raw2alpha = lambda raw, dists, act_fn=jt.nn.relu: 1. - jt.exp(-act_fn(raw) * dists) dists = z_vals[..., 1:] - z_vals[..., :-1] dists = jt.concat([ dists, jt.array(np.array([1e10]).astype(np.float32)).expand( dists[..., :1].shape) ], -1) # [N_rays, N_samples] dists = dists * jt.norm(rays_d.unsqueeze(-2), p=2, dim=-1) rgb = jt.sigmoid(raw[..., :3]) # [N_rays, N_samples, 3] noise = 0. if raw_noise_std > 0.: noise = jt.init.gauss(raw[..., 3].shape, raw.dtype) * raw_noise_std alpha = raw2alpha(raw[..., 3] + noise, dists) # [N_rays, N_samples] weights = alpha * jt.cumprod( jt.concat([jt.ones( (alpha.shape[0], 1)), 1. - alpha + 1e-10], -1), -1)[:, :-1] rgb_map = jt.sum(weights.unsqueeze(-1) * rgb, -2) # [N_rays, 3] depth_map = jt.sum(weights * z_vals, -1) disp_map = 1. / jt.maximum(1e-10 * jt.ones_like(depth_map), depth_map / jt.sum(weights, -1)) acc_map = jt.sum(weights, -1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map.unsqueeze(-1)) return rgb_map, disp_map, acc_map, weights, depth_map
def boxlist_partly_overlap(boxlist1, boxlist2): """Compute the intersection over union of two set of boxes. The box order must be (xmin, ymin, xmax, ymax). Arguments: box1: (BoxList) bounding boxes, sized [N,4]. box2: (BoxList) bounding boxes, sized [M,4]. Returns: (tensor) iou, sized [N,M]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py """ if boxlist1.size != boxlist2.size: raise RuntimeError( "boxlists should have same image size, got {}, {}".format( boxlist1, boxlist2)) N = len(boxlist1) M = len(boxlist2) area1 = boxlist1.area() area2 = boxlist2.area() box1, box2 = boxlist1.bbox, boxlist2.bbox lt = jt.maximum(box1[:, :2].unsqueeze(1), box2[:, :2]) # [N,M,2] rb = jt.minimum(box1[:, 2:].unsqueeze(1), box2[:, 2:]) # [N,M,2] TO_REMOVE = 1 wh = (rb - lt + TO_REMOVE).clamp(min_v=0, max_v=999999) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] iou = inter / (area1[:].unsqueeze(1) + area2 - inter) overlap = iou > 0 not_complete_overlap = (inter - area1[:].unsqueeze(1)) * ( inter - area2[:].unsqueeze(0)) != 0 partly_overlap = overlap * not_complete_overlap return partly_overlap
def cross_entropy_loss(output, target, ignore_index=None): if len(output.shape) == 4: c_dim = output.shape[1] output = output.transpose((0, 2, 3, 1)) output = output.reshape((-1, c_dim)) if ignore_index is not None: target = jt.ternary(target == ignore_index, jt.array(-1).broadcast(target), target) mask = jt.logical_and(target >= 0, target < output.shape[1]) target = target.reshape((-1, )) target = target.broadcast(output, [1]) target = target.index(1) == target output = output - output.max([1], keepdims=True) loss = output.exp().sum(1).log() loss = loss - (output * target).sum(1) if ignore_index is None: return loss.mean() else: return loss.sum() / jt.maximum(mask.int().sum(), 1)
def sanitize_coordinates(_x1, _x2, img_size: int, padding: int = 0, cast: bool = True): """ Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size. Also converts from relative to absolute coordinates and casts the results to long tensors. If cast is false, the result won't be cast to longs. Warning: this does things in-place behind the scenes so copy if necessary. """ _x1 = _x1 * img_size _x2 = _x2 * img_size if cast: _x1 = _x1.int32() _x2 = _x2.int32() x1 = jt.minimum(_x1, _x2) x2 = jt.maximum(_x1, _x2) x1 = jt.clamp(x1 - padding, min_v=0) x2 = jt.clamp(x2 + padding, max_v=img_size) return x1, x2
def box_iou(box1, box2): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ def box_area(box): # box = 4xn return (box[2] - box[0]) * (box[3] - box[1]) area1 = box_area(box1.transpose(1, 0)) area2 = box_area(box2.transpose(1, 0)) # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) inter = (jt.minimum(box1[:, None, 2:], box2[:, 2:]) - jt.maximum(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) return inter / (area1[:, None] + area2 - inter ) # iou = inter / (area1 + area2 - inter)
def max_length(self): _, _, w, h = self._split_into_xywh() return jt.maximum(w, h).squeeze(1)
def relu6(x): return jt.minimum(jt.maximum(x, 0), 6) class PReLU(Module):
def relu(x): return jt.maximum(x, f32(0))
def execute(self, x): if self.num_parameters != 1: assert self.num_parameters == x.size(1), f"num_parameters does not match input channels in PReLU" return jt.maximum(0, x) + self.a.broadcast(x, [0,2,3]) * jt.minimum(0, x) else: return jt.maximum(0, x) + self.a * jt.minimum(0, x)
def relu(x): return jt.maximum(x, 0) def leaky_relu(x, scale=0.01): return jt.ternary(x>0, x, x*scale)
def bce_loss(output, target, size_average=True): if size_average: return - (target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))).mean() else: return - (target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))).sum()
def relu(x): return jt.maximum(x, jt.float32(0)) def resnet_fake():
def max_image_size(self): return jt.maximum(jt.array([self.size[0]]).float(), jt.array([self.size[1]]).float())
def relu6(x): return jt.minimum(jt.maximum(x, 0), 6)
def bce_loss(output, target): return -(target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))).mean()
def relu(x): return jt.maximum(x, 0)
def relu(x): return jt.maximum(x, jt.float32(0))