def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b):
        loss = 0

        # get the nonzero indices
        mask_a_indices_flat = torch.nonzero(mask_a)
        mask_b_indices_flat = torch.nonzero(mask_b)
        if len(mask_a_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)
        if len(mask_b_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)

        # take 5000 random pixel samples of the object, using the mask
        num_samples = 10000

        rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda()
        rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1)

        rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda()
        rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)

        # index into the image and get descriptors
        M_margin = 0.5 # margin parameter
        random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat)
        random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat)
        pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2)
        pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin)
        zeros_vec = torch.zeros_like(pixel_wise_loss)
        loss += torch.max(zeros_vec, pixel_wise_loss).sum()

        return loss
Exemplo n.º 2
0
    def __call__(self, spec_f):

        spec_f, is_variable = _check_is_variable(spec_f)
        n_fft = spec_f.size(2)

        m_min = 0. if self.f_min == 0 else 2595 * np.log10(1. + (self.f_min / 700))
        m_max = 2595 * np.log10(1. + (self.f_max / 700))

        m_pts = torch.linspace(m_min, m_max, self.n_mels + 2)
        f_pts = (700 * (10**(m_pts / 2595) - 1))

        bins = torch.floor(((n_fft - 1) * 2) * f_pts / self.sr).long()

        fb = torch.zeros(n_fft, self.n_mels)
        for m in range(1, self.n_mels + 1):
            f_m_minus = bins[m - 1].item()
            f_m = bins[m].item()
            f_m_plus = bins[m + 1].item()

            if f_m_minus != f_m:
                fb[f_m_minus:f_m, m - 1] = (torch.arange(f_m_minus, f_m) - f_m_minus) / (f_m - f_m_minus)
            if f_m != f_m_plus:
                fb[f_m:f_m_plus, m - 1] = (f_m_plus - torch.arange(f_m, f_m_plus)) / (f_m_plus - f_m)

        fb = Variable(fb)
        spec_m = torch.matmul(spec_f, fb)  # (c, l, n_fft) dot (n_fft, n_mels) -> (c, l, n_mels)
        return spec_m if is_variable else spec_m.data
Exemplo n.º 3
0
    def pixelcnn_generate(self, z1, z2):
        # Sampling from PixelCNN
        x_zeros = torch.zeros(
            (z1.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]))
        if self.args.cuda:
            x_zeros = x_zeros.cuda()

        for i in range(self.args.input_size[1]):
            for j in range(self.args.input_size[2]):
                samples_mean, samples_logvar = self.p_x(Variable(x_zeros, volatile=True), z1, z2)
                samples_mean = samples_mean.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1],
                                                 self.args.input_size[2])

                if self.args.input_type == 'binary':
                    probs = samples_mean[:, :, i, j].data
                    x_zeros[:, :, i, j] = torch.bernoulli(probs).float()
                    samples_gen = samples_mean

                elif self.args.input_type == 'gray' or self.args.input_type == 'continuous':
                    binsize = 1. / 256.
                    samples_logvar = samples_logvar.view(samples_mean.size(0), self.args.input_size[0],
                                                         self.args.input_size[1], self.args.input_size[2])
                    means = samples_mean[:, :, i, j].data
                    logvar = samples_logvar[:, :, i, j].data
                    # sample from logistic distribution
                    u = torch.rand(means.size()).cuda()
                    y = torch.log(u) - torch.log(1. - u)
                    sample = means + torch.exp(logvar) * y
                    x_zeros[:, :, i, j] = torch.floor(sample / binsize) * binsize
                    samples_gen = samples_mean

        return samples_gen
def random_sample_from_masked_image_torch(img_mask, num_samples):
    """

    :param img_mask: Numpy array [H,W] or torch.Tensor with shape [H,W]
    :type img_mask:
    :param num_samples: an integer
    :type num_samples:
    :return: tuple of torch.LongTensor in (u,v) format. Each torch.LongTensor has shape
    [num_samples]
    :rtype:
    """

    image_height, image_width = img_mask.shape

    if isinstance(img_mask, np.ndarray):
        img_mask_torch = torch.from_numpy(img_mask).float()
    else:
        img_mask_torch = img_mask

    # This code would randomly subsample from the mask
    mask = img_mask_torch.view(image_width*image_height,1).squeeze(1)
    mask_indices_flat = torch.nonzero(mask)
    if len(mask_indices_flat) == 0:
        return (None, None)

    rand_numbers = torch.rand(num_samples)*len(mask_indices_flat)
    rand_indices = torch.floor(rand_numbers).long()
    uv_vec_flattened = torch.index_select(mask_indices_flat, 0, rand_indices).squeeze(1)
    uv_vec = utils.flattened_pixel_locations_to_u_v(uv_vec_flattened, image_width)
    return uv_vec
Exemplo n.º 5
0
    def _PyramidRoI_Feat(self, feat_maps, rois, im_info):
        ''' roi pool on pyramid feature maps'''
        # do roi pooling based on predicted rois
        img_area = im_info[0][0] * im_info[0][1]
        h = rois.data[:, 4] - rois.data[:, 2] + 1
        w = rois.data[:, 3] - rois.data[:, 1] + 1
        roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2)
        roi_level = torch.floor(roi_level + 4)
        # --------
        # roi_level = torch.log(torch.sqrt(h * w) / 224.0)
        # roi_level = torch.round(roi_level + 4)
        # ------
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5
        # roi_level.fill_(5)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            # NOTE: need to add pyrmaid
            grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size)  ##
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ##
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        elif cfg.POOLING_MODE == 'pool':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]
            
        return roi_pool_feat
Exemplo n.º 6
0
 def _create_dummy_data(filename):
     data = torch.rand(num_examples * maxlen)
     data = 97 + torch.floor(26 * data).int()
     with open(os.path.join(data_dir, filename), 'w') as h:
         offset = 0
         for _ in range(num_examples):
             ex_len = random.randint(1, maxlen)
             ex_str = ' '.join(map(chr, data[offset:offset+ex_len]))
             print(ex_str, file=h)
             offset += ex_len
Exemplo n.º 7
0
def th_random_choice(a, n_samples=1, replace=True, p=None):
    """
    Parameters
    -----------
    a : 1-D array-like
        If a th.Tensor, a random sample is generated from its elements.
        If an int, the random sample is generated as if a was th.range(n)
    n_samples : int, optional
        Number of samples to draw. Default is None, in which case a
        single value is returned.
    replace : boolean, optional
        Whether the sample is with or without replacement
    p : 1-D array-like, optional
        The probabilities associated with each entry in a.
        If not given the sample assumes a uniform distribution over all
        entries in a.

    Returns
    --------
    samples : 1-D ndarray, shape (size,)
        The generated random samples
    """
    if isinstance(a, int):
        a = th.arange(0, a)

    if p is None:
        if replace:
            idx = th.floor(th.rand(n_samples)*a.size(0)).long()
        else:
            idx = th.randperm(len(a))[:n_samples]
    else:
        if abs(1.0-sum(p)) > 1e-3:
            raise ValueError('p must sum to 1.0')
        if not replace:
            raise ValueError('replace must equal true if probabilities given')
        idx_vec = th.cat([th.zeros(round(p[i]*1000))+i for i in range(len(p))])
        idx = (th.floor(th.rand(n_samples)*999)).long()
        idx = idx_vec[idx].long()
    selection = a[idx]
    if n_samples == 1:
        selection = selection[0]
    return selection
Exemplo n.º 8
0
def tanh_quantize(input, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input)
    input = torch.tanh(input) # [-1, 1]
    input_rescale = (input + 1.0) / 2 #[0, 1]
    n = math.pow(2.0, bits) - 1
    v = torch.floor(input_rescale * n + 0.5) / n
    v = 2 * v - 1 # [-1, 1]

    v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh
    return v
Exemplo n.º 9
0
def linear_quantize(input, sf, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input) - 1
    delta = math.pow(2.0, -sf)
    bound = math.pow(2.0, bits-1)
    min_val = - bound
    max_val = bound - 1
    rounded = torch.floor(input / delta + 0.5)

    clipped_value = torch.clamp(rounded, min_val, max_val) * delta
    return clipped_value
Exemplo n.º 10
0
    def __call__(self, boxlists):
        """
        Arguments:
            boxlists (list[BoxList])
        """
        # Compute level ids
        s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists]))

        # Eqn.(1) in FPN paper
        target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps))
        target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
        return target_lvls.to(torch.int64) - self.k_min
Exemplo n.º 11
0
def min_max_quantize(input, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input) - 1
    min_val, max_val = input.min(), input.max()

    if isinstance(min_val, Variable):
        max_val = float(max_val.data.cpu().numpy()[0])
        min_val = float(min_val.data.cpu().numpy()[0])

    input_rescale = (input - min_val) / (max_val - min_val)

    n = math.pow(2.0, bits) - 1
    v = torch.floor(input_rescale * n + 0.5) / n

    v =  v * (max_val - min_val) + min_val
    return v
Exemplo n.º 12
0
def log_Logistic_256(x, mean, logvar, average=False, reduce=True, dim=None):
    bin_size = 1. / 256.

    # implementation like https://github.com/openai/iaf/blob/master/tf_utils/distributions.py#L28
    scale = torch.exp(logvar)
    x = (torch.floor(x / bin_size) * bin_size - mean) / scale
    cdf_plus = torch.sigmoid(x + bin_size/scale)
    cdf_minus = torch.sigmoid(x)

    # calculate final log-likelihood for an image
    log_logist_256 = - torch.log(cdf_plus - cdf_minus + 1.e-7)

    if reduce:
        if average:
            return torch.mean(log_logist_256, dim)
        else:
            return torch.sum(log_logist_256, dim)
    else:
        return log_logist_256
Exemplo n.º 13
0
def fit_positive(rows, cols, yx_min, yx_max, anchors):
    device_id = anchors.get_device() if torch.cuda.is_available() else None
    batch_size, num, _ = yx_min.size()
    num_anchors, _ = anchors.size()
    valid = torch.prod(yx_min < yx_max, -1)
    center = (yx_min + yx_max) / 2
    ij = torch.floor(center)
    i, j = torch.unbind(ij.long(), -1)
    index = i * cols + j
    anchors2 = anchors / 2
    iou_matrix = utils.iou.torch.iou_matrix((yx_min - center).view(-1, 2), (yx_max - center).view(-1, 2), -anchors2, anchors2).view(batch_size, -1, num_anchors)
    iou, index_anchor = iou_matrix.max(-1)
    _positive = []
    cells = rows * cols
    for valid, index, index_anchor in zip(torch.unbind(valid), torch.unbind(index), torch.unbind(index_anchor)):
        index, index_anchor = (t[valid] for t in (index, index_anchor))
        t = utils.ensure_device(torch.ByteTensor(cells, num_anchors).zero_(), device_id)
        t[index, index_anchor] = 1
        _positive.append(t)
    return torch.stack(_positive)
Exemplo n.º 14
0
    def compute_frustum_bounds(self, world_to_grid, camera_to_world):
        corner_points = camera_to_world.new(8, 4, 1).fill_(1)
	    # depth min
        corner_points[0,:3,0] = self.depth_to_skeleton(0, 0, self.depth_min)
        corner_points[1,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, 0, self.depth_min)
        corner_points[2,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, self.image_dims[1] - 1, self.depth_min)
        corner_points[3,:3,0] = self.depth_to_skeleton(0, self.image_dims[1] - 1, self.depth_min)
        # depth max
        corner_points[4,:3,0] = self.depth_to_skeleton(0, 0, self.depth_max)
        corner_points[5,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, 0, self.depth_max)
        corner_points[6,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, self.image_dims[1] - 1, self.depth_max)
        corner_points[7,:3,0] = self.depth_to_skeleton(0, self.image_dims[1] - 1, self.depth_max)

        p = torch.bmm(camera_to_world.repeat(8, 1, 1), corner_points)
        pl = torch.round(torch.bmm(world_to_grid.repeat(8, 1, 1), torch.floor(p)))
        pu = torch.round(torch.bmm(world_to_grid.repeat(8, 1, 1), torch.ceil(p)))
        bbox_min0, _ = torch.min(pl[:, :3, 0], 0)
        bbox_min1, _ = torch.min(pu[:, :3, 0], 0)
        bbox_min = np.minimum(bbox_min0, bbox_min1)
        bbox_max0, _ = torch.max(pl[:, :3, 0], 0)
        bbox_max1, _ = torch.max(pu[:, :3, 0], 0) 
        bbox_max = np.maximum(bbox_max0, bbox_max1)
        return bbox_min, bbox_max
def create_non_correspondences(uv_b_matches, img_b_shape, num_non_matches_per_match=100, img_b_mask=None):
    """
    Takes in pixel matches (uv_b_matches) that correspond to matches in another image, and generates non-matches by just sampling in image space.

    Optionally, the non-matches can be sampled from a mask for image b.

    Returns non-matches as pixel positions in image b.

    Please see 'coordinate_conventions.md' documentation for an explanation of pixel coordinate conventions.

    ## Note that arg uv_b_matches are the outputs of batch_find_pixel_correspondences()

    :param uv_b_matches: tuple of torch.FloatTensors, where each FloatTensor is length n, i.e.:
        (torch.FloatTensor, torch.FloatTensor)

    :param img_b_shape: tuple of (H,W) which is the shape of the image

    (optional)
    :param num_non_matches_per_match: int

    (optional)
    :param img_b_mask: torch.FloatTensor (can be cuda or not)
        - masked image, we will select from the non-zero entries
        - shape is H x W
     
    :return: tuple of torch.FloatTensors, i.e. (torch.FloatTensor, torch.FloatTensor).
        - The first element of the tuple is all "u" pixel positions, and the right element of the tuple is all "v" positions
        - Each torch.FloatTensor is of shape torch.Shape([num_matches, non_matches_per_match])
        - This shape makes it so that each row of the non-matches corresponds to the row for the match in uv_a
    """
    image_width  = img_b_shape[1]
    image_height = img_b_shape[0]

    if uv_b_matches == None:
        return None

    num_matches = len(uv_b_matches[0])

    def get_random_uv_b_non_matches():
        return pytorch_rand_select_pixel(width=image_width,height=image_height, 
            num_samples=num_matches*num_non_matches_per_match)

    if img_b_mask is not None:
        img_b_mask_flat = img_b_mask.view(-1,1).squeeze(1)
        mask_b_indices_flat = torch.nonzero(img_b_mask_flat)
        if len(mask_b_indices_flat) == 0:
            print "warning, empty mask b"
            uv_b_non_matches = get_random_uv_b_non_matches()
        else:
            num_samples = num_matches*num_non_matches_per_match
            rand_numbers_b = torch.rand(num_samples)*len(mask_b_indices_flat)
            rand_indices_b = torch.floor(rand_numbers_b).long()
            randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)
            uv_b_non_matches = (randomized_mask_b_indices_flat%image_width, randomized_mask_b_indices_flat/image_width)
    else:
        uv_b_non_matches = get_random_uv_b_non_matches()
    
    # for each in uv_a, we want non-matches
    # first just randomly sample "non_matches"
    # we will later move random samples that were too close to being matches
    uv_b_non_matches = (uv_b_non_matches[0].view(num_matches,num_non_matches_per_match), uv_b_non_matches[1].view(num_matches,num_non_matches_per_match))

    # uv_b_matches can now be used to make sure no "non_matches" are too close
    # to preserve tensor size, rather than pruning, we can perturb these in pixel space
    copied_uv_b_matches_0 = torch.t(uv_b_matches[0].repeat(num_non_matches_per_match, 1))
    copied_uv_b_matches_1 = torch.t(uv_b_matches[1].repeat(num_non_matches_per_match, 1))

    diffs_0 = copied_uv_b_matches_0 - uv_b_non_matches[0].type(dtype_float)
    diffs_1 = copied_uv_b_matches_1 - uv_b_non_matches[1].type(dtype_float)

    diffs_0_flattened = diffs_0.view(-1,1)
    diffs_1_flattened = diffs_1.view(-1,1)

    diffs_0_flattened = torch.abs(diffs_0_flattened).squeeze(1)
    diffs_1_flattened = torch.abs(diffs_1_flattened).squeeze(1)


    need_to_be_perturbed = torch.zeros_like(diffs_0_flattened)
    ones = torch.zeros_like(diffs_0_flattened)
    num_pixels_too_close = 1.0
    threshold = torch.ones_like(diffs_0_flattened)*num_pixels_too_close

    # determine which pixels are too close to being matches
    need_to_be_perturbed = where(diffs_0_flattened < threshold, ones, need_to_be_perturbed)
    need_to_be_perturbed = where(diffs_1_flattened < threshold, ones, need_to_be_perturbed)

    minimal_perturb        = num_pixels_too_close/2
    minimal_perturb_vector = (torch.rand(len(need_to_be_perturbed))*2).floor()*(minimal_perturb*2)-minimal_perturb
    std_dev = 10
    random_vector = torch.randn(len(need_to_be_perturbed))*std_dev + minimal_perturb_vector
    perturb_vector = need_to_be_perturbed*random_vector

    uv_b_non_matches_0_flat = uv_b_non_matches[0].view(-1,1).type(dtype_float).squeeze(1)
    uv_b_non_matches_1_flat = uv_b_non_matches[1].view(-1,1).type(dtype_float).squeeze(1)

    uv_b_non_matches_0_flat = uv_b_non_matches_0_flat + perturb_vector
    uv_b_non_matches_1_flat = uv_b_non_matches_1_flat + perturb_vector

    # now just need to wrap around any that went out of bounds

    # handle wrapping in width
    lower_bound = 0.0
    upper_bound = image_width*1.0 - 1
    lower_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * lower_bound
    upper_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * upper_bound

    uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat > upper_bound_vec, 
        uv_b_non_matches_0_flat - upper_bound_vec, 
        uv_b_non_matches_0_flat)

    uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat < lower_bound_vec, 
        uv_b_non_matches_0_flat + upper_bound_vec, 
        uv_b_non_matches_0_flat)

    # handle wrapping in height
    lower_bound = 0.0
    upper_bound = image_height*1.0 - 1
    lower_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * lower_bound
    upper_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * upper_bound

    uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat > upper_bound_vec, 
        uv_b_non_matches_1_flat - upper_bound_vec, 
        uv_b_non_matches_1_flat)

    uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat < lower_bound_vec, 
        uv_b_non_matches_1_flat + upper_bound_vec, 
        uv_b_non_matches_1_flat)

    return (uv_b_non_matches_0_flat.view(num_matches, num_non_matches_per_match),
        uv_b_non_matches_1_flat.view(num_matches, num_non_matches_per_match))
Exemplo n.º 16
0
def mod289(x):
    return x - torch.floor(x * (1.0 / 289.0)) * 289.0
Exemplo n.º 17
0
def mask2d(batch_size, dim, keep_prob, device):
    mask = torch.floor(torch.rand(batch_size, dim, device=device) +
                       keep_prob) / keep_prob
    return mask
Exemplo n.º 18
0
def fill_norm(yx_min, yx_max, anchors):
    center = (yx_min + yx_max) / 2
    ij = torch.floor(center)
    center_offset = center - ij
    size = yx_max - yx_min
    return center_offset, torch.log(size / anchors.view(1, -1, 2))
Exemplo n.º 19
0
def generate_vcmr_predictions_from_res(eval_res, max_prop_per_query=300, query_bsz_in_sort=1000):
    """ This function is for Video Corpus Moment Retrieval (VCMR).
    Generate prediction file which could be evaluated using standalone_eval.eval.
    Args:
        eval_res: dict(
            query_meta=query_meta_list,  # N_q * dict(), each dict is {"desc_id": int, "desc": str}
            video_meta=video_meta_list,  # N_videos * dict(), {"vid_name": str, "duration": float, "proposals": ndarray}
            video2idx=eval_dataset.video2idx,  # dict {vid_name: index}
            video_bsz_in_sort=[],  # N_videos * (N_q, N_prop)
        )
        max_prop_per_query: int or None. If None, generate ranking for all possible moments, else generate top {}.
        query_bsz_in_sort: int, only sort a subset of queries at a time, it will be too large to sort all queries.
    return:
        list(dicts): each dict is dict(desc=str, desc_id=int, predictions=list(sublist)),
            each sublist is [vid_name (str), st (float), ed (float), score (float)], score is negative distance.
    """
    # video2idx
    video2idx = eval_res["video2idx"]

    # (N_videos, N_prop, N_q), (N_videos, N_prop)
    padded_dist, padded_mask = pad_sequences_1d([e.transpose(0, 1) for e in eval_res["query_prop_dist_vcmr"]],
                                                dtype=eval_res["query_prop_dist_vcmr"][0].dtype,
                                                device=eval_res["query_prop_dist_vcmr"][0].device)
    # putting 'NaN' into the invalid bits, torch.sort considers 'NaN' as larger than any number!!!
    padded_dist += (padded_mask.unsqueeze(2) == 0).float() * 1e10
    n_videos, n_prop, n_q = padded_dist.shape
    print("n_videos, n_prop, n_q {}".format((n_videos, n_prop, n_q)))
    padded_dist = padded_dist.view(n_videos * n_prop, n_q).transpose(0, 1).contiguous()  # (N_q, N_video*N_prop)
    print("padded_dist, {}".format(padded_dist.shape))

    sorted_distances, sorted_indices = torch.topk(padded_dist.to(torch.device("cuda:0"), non_blocking=True),
                                                  k=min(max_prop_per_query, n_videos * n_prop),
                                                  dim=1, largest=False, sorted=True)  # (N_q, max_prop_per_query) * 2
    sorted_distances = - sorted_distances.cpu().numpy()

    # (N_q, max_prop_per_query) * 2, prop_indices: inside video indices.
    video_meta_indices = torch.floor(sorted_indices.float() / n_prop).long().cpu().numpy()
    prop_indices = torch.remainder(sorted_indices, n_prop).cpu().numpy()

    vr_res = []
    query_meta = eval_res["query_meta"]
    for i in trange(n_q, desc="[VR] Loop over queries to generate predictions"):
        row = video_meta_indices[i]
        score_row = - sorted_distances[i]
        cur_vr_redictions = []
        for j, meta_idx in enumerate(row):
            video_idx = video2idx[eval_res["video_meta"][meta_idx]["vid_name"]]
            cur_vr_redictions.append([video_idx, 0, 0, float(score_row[j])])
        cur_query_pred = dict(
            desc_id=query_meta[i]["desc_id"],
            desc=query_meta[i]["desc"],
            predictions=cur_vr_redictions
        )
        vr_res.append(cur_query_pred)

    vcmr_res = []
    logger.debug("sorted_indices {}".format(sorted_indices.shape))
    logger.debug("sorted_distances {}".format(sorted_distances.shape))
    for idx, (vm_row_indices, p_row_indices) in tqdm(enumerate(zip(video_meta_indices, prop_indices)),
                                                     desc="[VCMR] Loop over queries to generate predictions",
                                                     total=n_q):  # query
        sorted_distances_row = - sorted_distances[idx]  # converted to negative distance
        # [video_idx(int), st(float), ed(float), score(float)]
        cur_ranked_predictions = []
        for col_idx, (v_col_idx, p_col_idx) in enumerate(zip(vm_row_indices, p_row_indices)):
            cur_pred = []
            cur_pred += [video2idx[eval_res["video_meta"][v_col_idx]["vid_name"]], ]
            cur_pred += eval_res["video_meta"][v_col_idx]["proposals"][p_col_idx].tolist()
            cur_pred += [float(sorted_distances_row[col_idx])]
            cur_ranked_predictions.append(cur_pred)
        cur_query_pred = dict(
            desc_id=eval_res["query_meta"][idx]["desc_id"],
            desc=eval_res["query_meta"][idx]["desc"],
            predictions=cur_ranked_predictions
        )
        vcmr_res.append(cur_query_pred)
    return vcmr_res, vr_res
Exemplo n.º 20
0
def floor_ste(x: torch.Tensor) -> torch.Tensor:
    y = torch.floor(x)
    return _ste(x, y)
Exemplo n.º 21
0
def regularize(f):
    f_ = (f - PI) / TWO_PI
    return TWO_PI * (f_ - torch.floor(f_) - 0.5)
Exemplo n.º 22
0
    def train(self):

        if self.T - self.target_sync_T > self.args.target:
            self.sync_target_network()
            self.target_sync_T = self.T

        info = {}

        for _ in range(self.args.iters):
            self.dqn.eval()

            batch, indices, is_weights = self.replay.Sample_N(self.args.batch_size, self.args.n_step, self.args.gamma)
            columns = list(zip(*batch))

            states = Variable(torch.from_numpy(np.array(columns[0])).float().transpose_(1, 3))
            actions = Variable(torch.LongTensor(columns[1]))
            terminal_states = Variable(torch.FloatTensor(columns[5]))
            rewards = Variable(torch.FloatTensor(columns[2]))
            # Have to clip rewards for DQN
            rewards = torch.clamp(rewards, -1, 1)
            steps = Variable(torch.FloatTensor(columns[4]))
            new_states = Variable(torch.from_numpy(np.array(columns[3])).float().transpose_(1, 3))

            target_dqn_qvals = self.target_dqn(new_states).cpu()
            # Make a new variable with those values so that these are treated as constants
            target_dqn_qvals_data = Variable(target_dqn_qvals.data)

            q_value_gammas = (Variable(torch.ones(terminal_states.size()[0])) - terminal_states)
            inter = Variable(torch.ones(terminal_states.size()[0]) * self.args.gamma)
            # print(steps)
            q_value_gammas = q_value_gammas * torch.pow(inter, steps)

            values = torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms)
            values = Variable(values)
            values = values.view(1, 1, self.args.atoms)
            values = values.expand(self.args.batch_size, self.args.actions, self.args.atoms)
            # print(values)

            q_value_gammas = q_value_gammas.view(self.args.batch_size, 1, 1)
            q_value_gammas = q_value_gammas.expand(self.args.batch_size, self.args.actions, self.args.atoms)
            # print(q_value_gammas)
            gamma_values = q_value_gammas * values
            # print(gamma_values)
            rewards = rewards.view(self.args.batch_size, 1, 1)
            rewards = rewards.expand(self.args.batch_size, self.args.actions, self.args.atoms)
            # print(rewards)
            operator_q_values = rewards + gamma_values
            # print(operator_q_values)

            clipped_operator_q_values = torch.clamp(operator_q_values, self.args.v_min, self.args.v_max)

            delta_z = (self.args.v_max - self.args.v_min) / (self.args.atoms - 1)
            # Using the notation from the categorical paper
            b_j = (clipped_operator_q_values - self.args.v_min) / delta_z
            # print(b_j)
            lower_bounds = torch.floor(b_j)
            upper_bounds = torch.ceil(b_j)

            # Work out the max action
            atom_values = Variable(torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms))
            atom_values = atom_values.view(1, 1, self.args.atoms)
            atom_values = atom_values.expand(self.args.batch_size, self.args.actions, self.args.atoms)

            # Sum over the atoms dimension
            target_expected_qvalues = torch.sum(target_dqn_qvals_data * atom_values, dim=2)
            # Get the maximum actions index across the batch size
            max_actions = target_expected_qvalues.max(dim=1)[1].view(-1)

            # Project back onto the original support for the max actions
            q_value_distribution_targets = torch.zeros(self.args.batch_size, self.args.atoms)

            # Distributions for the max actions
            # print(target_dqn_qvals_data, max_actions)
            q_value_max_actions_distribs = target_dqn_qvals_data.index_select(dim=1, index=max_actions)[:,0,:]
            # print(q_value_max_actions_distribs)

            # Lower_bounds_actions
            lower_bounds_actions = lower_bounds.index_select(dim=1, index=max_actions)[:,0,:]
            upper_bounds_actions = upper_bounds.index_select(dim=1, index=max_actions)[:,0,:]
            b_j_actions = b_j.index_select(dim=1, index=max_actions)[:,0,:]

            lower_bound_values_to_add = q_value_max_actions_distribs * (upper_bounds_actions - b_j_actions)
            upper_bound_values_to_add = q_value_max_actions_distribs * (b_j_actions - lower_bounds_actions)
            # print(lower_bounds_actions)
            # print(lower_bound_values_to_add)
            # Naive looping
            for b in range(self.args.batch_size):
                for l, pj in zip(lower_bounds_actions.data.type(torch.LongTensor)[b], lower_bound_values_to_add[b].data):
                    q_value_distribution_targets[b][l] += pj
                for u, pj in zip(upper_bounds_actions.data.type(torch.LongTensor)[b], upper_bound_values_to_add[b].data):
                    q_value_distribution_targets[b][u] += pj

            self.dqn.train()
            if self.args.gpu:
                actions = actions.cuda()
                # q_value_targets = q_value_targets.cuda()
                q_value_distribution_targets = q_value_distribution_targets.cuda()
            model_predictions = self.dqn(states).index_select(1, actions.view(-1))[:,0,:]
            q_value_distribution_targets = Variable(q_value_distribution_targets)
            # print(q_value_distribution_targets)
            # print(model_predictions) 

            # Cross entropy loss
            ce_loss = -torch.sum(q_value_distribution_targets * torch.log(model_predictions), dim=1)
            ce_batch_loss = ce_loss.mean()

            info = {}

            self.log("DQN/X_Entropy_Loss", ce_batch_loss.data[0], step=self.T)

            # Update
            self.optimizer.zero_grad()
            ce_batch_loss.backward()

            # Taken from pytorch clip_grad_norm
            # Remove once the pip version it up to date with source
            gradient_norm = clip_grad_norm(self.dqn.parameters(), self.args.clip_value)
            if gradient_norm is not None:
                info["Norm"] = gradient_norm

            self.optimizer.step()

            if "States" in info:
                states_trained = info["States"]
                info["States"] = states_trained + columns[0]
            else:
                info["States"] = columns[0]

        # Pad out the states to be of size batch_size
        if len(info["States"]) < self.args.batch_size:
            old_states = info["States"]
            new_states = old_states[0] * (self.args.batch_size - len(old_states))
            info["States"] = new_states

        return info
def _get_random_inputs(device):
    return torch.floor(torch.rand(
        (BATCH_SIZE, 2)) * EMB_SIZE).to(dtype=torch.long, device=device)
Exemplo n.º 24
0
    def _interpolate(im, x, y, out_size):
        # constants
        num_batch, height, width, channels = im.shape[0], im.shape[
            1], im.shape[2], im.shape[3]
        out_height = out_size[0]
        out_width = out_size[1]
        max_y = int(height - 1)
        max_x = int(width - 1)

        # scale indices from [-1, 1] to [0, width/height]
        x = (x + 1.0) * (width - 1.0) / 2.0
        y = (y + 1.0) * (height - 1.0) / 2.0

        # do sampling
        x0 = (torch.floor(x)).int()
        x1 = x0 + 1
        y0 = (torch.floor(y)).int()
        y1 = y0 + 1

        x0_c = torch.clamp(x0, 0, max_x)
        x1_c = torch.clamp(x1, 0, max_x)
        y0_c = torch.clamp(y0, 0, max_y)
        y1_c = torch.clamp(y1, 0, max_y)

        dim2 = width
        dim1 = width * height
        base = _repeat(
            torch.arange(0, num_batch) * dim1,
            out_height * out_width).to(im.get_device())

        base_y0 = base + y0_c * dim2
        base_y1 = base + y1_c * dim2
        idx_a = base_y0 + x0_c
        idx_b = base_y1 + x0_c
        idx_c = base_y0 + x1_c
        idx_d = base_y1 + x1_c

        # use indices to lookup pixels in the flat image and restore
        # channels dim
        im_flat = im.view([-1, channels])
        im_flat = im_flat.float()

        # and finally calculate interpolated values
        x0_f = x0.float()
        x1_f = x1.float()
        y0_f = y0.float()
        y1_f = y1.float()
        wa = ((x1_f - x) * (y1_f - y)).unsqueeze(1)
        wb = ((x1_f - x) * (y - y0_f)).unsqueeze(1)
        wc = ((x - x0_f) * (y1_f - y)).unsqueeze(1)
        wd = ((x - x0_f) * (y - y0_f)).unsqueeze(1)

        zerof = torch.zeros_like(wa)
        wa = torch.where(
            (torch.eq(x0_c, x0) & torch.eq(y0_c, y0)).unsqueeze(1), wa, zerof)
        wb = torch.where(
            (torch.eq(x0_c, x0) & torch.eq(y1_c, y1)).unsqueeze(1), wb, zerof)
        wc = torch.where(
            (torch.eq(x1_c, x1) & torch.eq(y0_c, y0)).unsqueeze(1), wc, zerof)
        wd = torch.where(
            (torch.eq(x1_c, x1) & torch.eq(y1_c, y1)).unsqueeze(1), wd, zerof)

        zeros = torch.zeros(
            size=[int(num_batch) * int(height) * int(width),
                  int(channels)],
            dtype=torch.float)
        output = zeros.to(im.get_device())
        output = output.scatter_add(dim=0,
                                    index=idx_a.long().unsqueeze(1).repeat(
                                        1, channels),
                                    src=im_flat * wa)
        output = output.scatter_add(dim=0,
                                    index=idx_b.long().unsqueeze(1).repeat(
                                        1, channels),
                                    src=im_flat * wb)
        output = output.scatter_add(dim=0,
                                    index=idx_c.long().unsqueeze(1).repeat(
                                        1, channels),
                                    src=im_flat * wc)
        output = output.scatter_add(dim=0,
                                    index=idx_d.long().unsqueeze(1).repeat(
                                        1, channels),
                                    src=im_flat * wd)

        return output
def pytorch_rand_select_pixel(width,height,num_samples=1):
    two_rand_numbers = torch.rand(2,num_samples)
    two_rand_numbers[0,:] = two_rand_numbers[0,:]*width
    two_rand_numbers[1,:] = two_rand_numbers[1,:]*height
    two_rand_ints    = torch.floor(two_rand_numbers).type(dtype_long)
    return (two_rand_ints[0], two_rand_ints[1])
Exemplo n.º 26
0
def random_int_tensor(seed, size, low=0, high=2**32, a=22695477, c=1, m=2**32):
    """ Same as random_float_tensor but integers between [low, high)
    """
    return (
        torch.floor(random_float_tensor(seed, size, a, c, m) *
                    (high - low)) + low).to(torch.int64)
Exemplo n.º 27
0
    def _PyramidRoI_Feat(self, feat_maps, rois, im_info):
        ''' roi pool on pyramid feature maps'''
        # do roi pooling based on predicted rois
        img_area = im_info[0][0] * im_info[0][1]
        h = rois.data[:, 4] - rois.data[:, 2] + 1
        w = rois.data[:, 3] - rois.data[:, 1] + 1
        roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2)
        roi_level = torch.floor(roi_level + 4)
        # --------
        # roi_level = torch.log(torch.sqrt(h * w) / 224.0)
        # roi_level = torch.round(roi_level + 4)
        # ------
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5
        # roi_level.fill_(5)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            # NOTE: need to add pyrmaid
            grid_xy = _affine_grid_gen(rois,
                                       feat_maps.size()[2:],
                                       self.grid_size)  ##
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            roi_pool_feat = self.RCNN_roi_crop(feat_maps,
                                               Variable(grid_yx).detach())  ##
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        elif cfg.POOLING_MODE == 'pool':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        return roi_pool_feat