示例#1
0
    def __init__(self, D=4, R=6, C=32):
        super(Colorizer, self).__init__()
        self.D = D
        self.R = R  # window size
        self.C = C

        self.P = self.R * 2 + 1
        self.N = self.P * self.P
        self.count = 0

        self.memory_patch_R = 12
        self.memory_patch_P = self.memory_patch_R * 2 + 1
        self.memory_patch_N = self.memory_patch_P * self.memory_patch_P

        self.correlation_sampler_dilated = [
            SpatialCorrelationSampler(kernel_size=1,
                                      patch_size=self.memory_patch_P,
                                      stride=1,
                                      padding=0,
                                      dilation=1,
                                      dilation_patch=dirate)
            for dirate in range(2, 6)
        ]

        self.correlation_sampler = SpatialCorrelationSampler(kernel_size=1,
                                                             patch_size=self.P,
                                                             stride=1,
                                                             padding=0,
                                                             dilation=1)
    def __init__(self):
        super(Net, self).__init__()
        self.args = pwcnet_args()
        args = self.args

        self.feature_pyramid_extractor = FeaturePyramidExtractor(args).to(args.device)
        
        self.warping_layer = WarpingLayer(args)
        self.corr = SpatialCorrelationSampler(kernel_size=1, patch_size=(args.search_range*2)+1, stride=1).to(args.device)
        self.flow_estimators = []
        for l, ch in enumerate(args.lv_chs[::-1]):
            
            layer = OpticalFlowEstimator(args, ch + (args.search_range*2+1)**2 + 2).to(args.device)
            self.add_module(f'FlowEstimator(Lv{l})', layer)
            self.flow_estimators.append(layer)

        if args.context:
            self.context_networks = []
            for l, ch in enumerate(args.lv_chs[::-1]):
                layer = ContextNetwork(args, ch + 2).to(args.device)
                self.add_module(f'ContextNetwork(Lv{l})', layer)
                self.context_networks.append(layer)

        # init
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None: nn.init.uniform_(m.bias)
                nn.init.xavier_uniform_(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None: nn.init.uniform_(m.bias)
                nn.init.xavier_uniform_(m.weight)
示例#3
0
def cross_correlate(x, y, max_distance=9):
    """Efficiently computes the cross correlation of x and y.
  Optimized implementation using correlation_cost.
  Note that we do not normalize by the feature dimension.
  Args:
    x: Float32 tensor of shape [height, width, feature_dim].
    y: Float32 tensor of shape [height, width, feature_dim].
    max_distance: Integer, the maximum distance in pixel coordinates
      per dimension which is considered to be in the search window.
  Returns:
    Float32 tensor of shape [height, width, (2 * max_distance + 1) ** 2].
    """
    #corr_op=orrelation(pad_size=max_distance, kernel_size=1, max_displacement=max_distance, stride1=1, stride2=1, corr_multiply=1)
    corr_op = SpatialCorrelationSampler(kernel_size=1,
                                        patch_size=2 * max_distance + 1,
                                        stride=1,
                                        dilation_patch=1,
                                        padding=0)

    xs = x.permute(2, 0, 1)
    xs = torch.unsqueeze(xs, 0)
    ys = y.permute(2, 0, 1)
    ys = torch.unsqueeze(ys, 0)
    corr = corr_op(xs, ys)
    bs, _, _, hh, ww = corr.size()
    corr = corr.view(bs, -1, hh, ww)
    corr = torch.squeeze(corr, 0)
    corr = corr.permute(1, 2, 0)
    #    feature_dim=x.size()[-1]
    #    corr *= feature_dim
    return corr
示例#4
0
def correlation1d_cost(
    reference_fm,
    target_fm,
    max_disp=192,
    start_disp=0,
    dilation=1,
    disp_sample=None,
    kernel_size=1,
    stride=1,
    padding=0,
    dilation_patch=1,
):
    # for a pixel of left image at (x, y), it will calculates correlation cost volume
    # with pixel of right image at (xr, y), where xr in [x-max_disp, x+max_disp]
    # but we only need the left half part, i.e., [x-max_disp, 0]
    correlation_sampler = SpatialCorrelationSampler(
        patch_size=(1, max_disp * 2 - 1),
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        dilation_patch=dilation_patch)
    # [B, 1, max_disp*2-1, H, W]
    out = correlation_sampler(reference_fm, target_fm)

    # [B, max_disp*2-1, H, W]
    out = out.squeeze(1)

    # [B, max_disp, H, W], grad the left half searching part
    out = out[:, :max_disp, :, :]

    cost = F.leaky_relu(out, negative_slope=0.1, inplace=True)

    return cost
示例#5
0
    def __init__(self):
        super(FlowNet, self).__init__()

        print('FlowNet...')

        self.debug = False
        # self.debug = True
        
        self.heatmap_size = hyp.flow_patch_size
        # self.compress_dim = 8
        # self.scales = [0.25, 0.5, 1.0]
        self.scales = [0.125, 0.25, 0.5, 0.75, 1.0]
        self.num_scales = len(self.scales)
        
        # # slightly diff from flownet, here i am using one set of params across all scales
        # self.compressor = nn.Sequential(
        #     nn.Conv3d(in_channels=32, out_channels=self.compress_dim, kernel_size=1, stride=1, padding=0),
        # )
        self.correlation_sampler = SpatialCorrelationSampler(
            kernel_size=1,
            patch_size=self.heatmap_size,
            stride=1,
            padding=0,
            dilation_patch=1,
        )
        self.flow_predictor = nn.Sequential(
            nn.Conv3d(in_channels=(self.heatmap_size**3), out_channels=64, kernel_size=1, stride=1, padding=0),
            nn.LeakyReLU(),
            nn.Conv3d(in_channels=64, out_channels=3, kernel_size=1, stride=1, padding=0),
        )
        self.smoothl1 = torch.nn.SmoothL1Loss(reduction='none')
        self.smoothl1_mean = torch.nn.SmoothL1Loss(reduction='mean')
示例#6
0
 def corr(self, refimg_fea, targetimg_fea, maxdisp, fac=1):
     """
     correlation function. Adopted from https://github.com/ClementPinard/Pytorch-Correlation-extension
     faster, but backwards not implemented.
     """
     from spatial_correlation_sampler import SpatialCorrelationSampler
     corr = SpatialCorrelationSampler(kernel_size=1,patch_size=(int(1+2*maxdisp//fac),int(1+2*maxdisp)),stride=1,padding=0,dilation_patch=1)
     cost = corr(refimg_fea, targetimg_fea)
     cost = F.leaky_relu(cost, 0.1,inplace=True)
     return cost
示例#7
0
    def __init__(self):
        super(FlowNet, self).__init__()

        print('FlowNet...')

        self.debug = False
        # self.debug = True

        self.heatmap_size = hyp.flow_heatmap_size

        self.scales = [1.0]
        self.num_scales = len(self.scales)
        assert (self.num_scales == 1
                )  # do not touch scales, unless you adjust the masking

        dilation = 1
        grid_z, grid_y, grid_x = utils_basic.meshgrid3D(
            1, self.heatmap_size, self.heatmap_size, self.heatmap_size)

        self.max_disp = int(dilation * (self.heatmap_size - 1) / 2)
        if self.debug:
            print('max_disp', self.max_disp)

        self.grid = torch.stack([grid_z, grid_y, grid_x], dim=1) - int(
            self.heatmap_size / 2)
        # this is 1 x 3 x H x H x H, with 0 in the middle
        self.grid = self.grid.reshape(1, 3, -1, 1, 1, 1) * dilation
        # now we are ready to mult with the cc out

        # self.compress_dim = 16
        # self.compressor = nn.Sequential(
        #     nn.Conv3d(in_channels=hyp.feat_dim, out_channels=self.compress_dim, kernel_size=1, stride=1, padding=0),
        # )

        self.correlation_sampler = SpatialCorrelationSampler(
            kernel_size=1,
            patch_size=self.heatmap_size,
            stride=1,
            padding=0,
            dilation_patch=dilation,
        ).cuda()

        # self.flow_predictor = nn.Sequential(
        #     nn.Conv3d(in_channels=(self.heatmap_size**3), out_channels=64, kernel_size=3, stride=1, padding=1),
        #     nn.LeakyReLU(negative_slope=0.1),
        #     nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
        #     nn.LeakyReLU(negative_slope=0.1),
        #     nn.Conv3d(in_channels=64, out_channels=3, kernel_size=1, stride=1, padding=0),
        # ).cuda()

        self.smoothl1 = torch.nn.SmoothL1Loss(reduction='none')
        self.smoothl1_mean = torch.nn.SmoothL1Loss(reduction='mean')
        self.mse = torch.nn.MSELoss(reduction='none')
        self.mse_mean = torch.nn.MSELoss(reduction='mean')
示例#8
0
    def __init__(self):
        super(Colorizer, self).__init__()
        self.D = 4
        self.R = 6  # window size
        self.C = 16
        self.P = self.R * 2 + 1

        self.correlation_sampler = SpatialCorrelationSampler(kernel_size=1,
                                                             patch_size=self.P,
                                                             stride=1,
                                                             padding=0,
                                                             dilation=1)
示例#9
0
    def __init__(self,
                 D=4,
                 R=6,
                 C=32,
                 mode='faster',
                 training=False,
                 ksargmax=True):
        super(Colorizer, self).__init__()
        self.D = D
        self.R = R  # window size
        self.C = C

        self.P = self.R * 2 + 1
        self.N = self.P * self.P
        self.count = 0

        self.training = training
        self.mode = mode
        self.beta = 50
        self.kernel_sigma = 1.0
        self.ksargmax = ksargmax
        self.memory_patch_R = 12
        self.memory_patch_P = self.memory_patch_R * 2 + 1
        self.memory_patch_N = self.memory_patch_P * self.memory_patch_P

        self.correlation_sampler_dilated = [
            SpatialCorrelationSampler(kernel_size=1,
                                      patch_size=self.memory_patch_P,
                                      stride=1,
                                      padding=0,
                                      dilation=1,
                                      dilation_patch=dirate)
            for dirate in range(2, 6)
        ]

        self.correlation_sampler = SpatialCorrelationSampler(kernel_size=1,
                                                             patch_size=self.P,
                                                             stride=1,
                                                             padding=0,
                                                             dilation=1)
    def __init__(self):
        super(CorrelationHead, self).__init__()

        representation_size = 1024
        feature_map_res = 7
        correlation_patch_size = 16  # flownet uses 21, if we want to use faster-rcnn weight needs to be 16

        self.name = "CorrelationHead"
        self.roi_heads = None

        self.correlation_layer = SpatialCorrelationSampler(
            patch_size=correlation_patch_size, dilation_patch=2)
        self.fc1 = nn.Linear(correlation_patch_size**2 * feature_map_res**2,
                             representation_size)
        self.fc2 = nn.Linear(representation_size, representation_size)
        self.fc3 = nn.Linear(representation_size, 4)
示例#11
0
 def __init__(self, ks, patch, stride, pad, patch_dilation):
     super(Matching_layer, self).__init__()
     self.relu = nn.ReLU()
     self.patch = patch
     self.correlation_sampler = SpatialCorrelationSampler(ks, patch, stride, pad, patch_dilation)
示例#12
0
def local_previous_frame_nearest_neighbor_features_per_object(
        prev_frame_embedding,
        query_embedding,
        prev_frame_labels,
        gt_ids,
        max_distance=15):
    """Computes nearest neighbor features while only allowing local matches.
  Args:
    prev_frame_embedding: Tensor of shape [height, width, embedding_dim],
      the embedding vectors for the last frame.
    query_embedding: Tensor of shape [height, width, embedding_dim],
      the embedding vectors for the query frames.
    prev_frame_labels: Tensor of shape [height, width, 1], the class labels of
      the previous frame.
    gt_ids: Int Tensor of shape [n_objs] of the sorted unique ground truth
      ids in the first frame.
    max_distance: Integer, the maximum distance allowed for local matching.
  Returns:
    nn_features: A float32 np.array of nearest neighbor features of shape
      [1, height, width, n_objects, 1].
    """
    if USE_CORRELATION_COST:
        d = local_pairwise_distances(query_embedding,
                                     prev_frame_embedding,
                                     max_distance=max_distance)
    else:
        d = local_pairwise_distances2(query_embedding,
                                      prev_frame_embedding,
                                      max_distance=max_distance)
#    d = (torch.sigmoid(d) - 0.5) * 2
    height, width = prev_frame_embedding.size()[:2]

    if USE_CORRELATION_COST:
        #  corr_op=Correlation(pad_size=max_distance, kernel_size=1, max_displacement=max_distance, stride1=1, stride2=1, corr_multiply=1)
        corr_op = SpatialCorrelationSampler(kernel_size=1,
                                            patch_size=2 * max_distance + 1,
                                            stride=1,
                                            dilation_patch=1,
                                            padding=0)
        # New, faster code with cross-correlation via correlation_cost.
        # Due to padding we have to add 1 to the labels.
        tmp_prev_frame_labels = (prev_frame_labels + 1).float().permute(
            2, 0, 1)
        tmp_prev_frame_labels = torch.unsqueeze(tmp_prev_frame_labels, 0)

        ones_ = torch.ones_like(tmp_prev_frame_labels)
        offset_labels = corr_op(ones_, tmp_prev_frame_labels)
        bs, _, _, hh, ww = offset_labels.size()
        offset_labels = offset_labels.view(bs, -1, hh, ww)

        offset_labels = torch.squeeze(offset_labels, 0)
        offset_labels = offset_labels.permute(1, 2, 0)
        offset_labels = torch.unsqueeze(offset_labels, 3)
        offset_labels = torch.round(offset_labels - 1)
        offset_masks = torch.eq(
            offset_labels,
            gt_ids.float().unsqueeze(0).unsqueeze(0).unsqueeze(0))

    else:
        masks = torch.eq(prev_frame_labels, gt_ids.unsqueeze(0).unsqueeze(0))
        padded_masks = nn.functional.pad(masks, (
            0,
            0,
            max_distance,
            max_distance,
            max_distance,
            max_distance,
        ))
        offset_masks = []
        for y_start in range(2 * max_distance + 1):
            y_end = y_start + height
            masks_slice = padded_masks[y_start:y_end]
            for x_start in range(2 * max_distance + 1):
                x_end = x_start + width
                offset_mask = masks_slice[:, x_start:x_end]
                offset_masks.append(offset_mask)
        offset_masks = torch.stack(offset_masks, dim=2)
#    pad = torch.ones((height, width, (2 * max_distance + 1) ** 2, gt_ids.size(0)))
    d_tiled = d.unsqueeze(-1).repeat((1, 1, 1, gt_ids.size(0)))
    pad = torch.ones_like(d_tiled)
    #    if torch.cuda.is_available():
    #        pad=pad.cuda()
    #        d_tiled = d_tiled.cuda()
    d_masked = torch.where(offset_masks, d_tiled, pad)
    dists, _ = torch.min(d_masked, dim=2)
    dists = dists.view(1, height, width, gt_ids.size(0), 1)
    return dists
示例#13
0
文件: check.py 项目: xdr940/utils
parser.add_argument('-k', '--kernel-size', type=int, default=3)
parser.add_argument('--patch', type=int, default=3)
parser.add_argument('--patch_dilation', type=int, default=2)
parser.add_argument('-c', '--channel', type=int, default=10)
parser.add_argument('--height', type=int, default=10)
parser.add_argument('-w', '--width', type=int, default=10)
parser.add_argument('-s', '--stride', type=int, default=2)
parser.add_argument('-p', '--pad', type=int, default=5)
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args()

assert (torch.cuda.is_available()), "no comparison to make"
device = torch.device("cuda")

input1 = torch.randn(args.batch_size, args.channel, args.height,
                     args.width).double()
input2 = torch.randn(args.batch_size, args.channel, args.height,
                     args.width).double()
input1.requires_grad = True
input2.requires_grad = True

correlation_sampler = SpatialCorrelationSampler(args.kernel_size, args.patch,
                                                args.stride, args.pad,
                                                args.patch_dilation)

if 'forward' in args.direction:
    check_forward(input1, input2, correlation_sampler, args.verbose)

if 'backward' in args.direction:
    check_backward(input1, input2, correlation_sampler, args.verbose)
示例#14
0
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=(3, 5, 5),
                 layer_strides=(2, 2, 2),
                 num_filters=(128, 128, 256),
                 upsample_strides=(1, 2, 4),
                 num_upsample_filters=(256, 256, 256),
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 box_code_size=7,
                 num_direction_bins=2,
                 corr_patch_size=9,
                 corr_kernel_size=3,
                 corr_dilation_patch=1,
                 voting_range=1,
                 name='rpn'):
        """upsample_strides support float: [0.25, 0.5, 1]
        if upsample_strides < 1, conv2d will be used instead of convtranspose2d.
        """
        super(RPNBase_tracking, self).__init__(
            use_norm=use_norm,
            num_class=num_class,
            layer_nums=layer_nums,
            layer_strides=layer_strides,
            num_filters=num_filters,
            upsample_strides=upsample_strides,
            num_upsample_filters=num_upsample_filters,
            num_input_features=num_input_features,
            num_anchor_per_loc=num_anchor_per_loc,
            encode_background_as_zeros=encode_background_as_zeros,
            use_direction_classifier=use_direction_classifier,
            use_groupnorm=use_groupnorm,
            num_groups=num_groups,
            box_code_size=box_code_size,
            num_direction_bins=num_direction_bins,
            corr_patch_size=corr_patch_size,
            corr_kernel_size=corr_kernel_size,
            corr_dilation_patch=corr_dilation_patch,
            voting_range=voting_range,
            name=name)
        self._num_anchor_per_loc = num_anchor_per_loc
        self._num_direction_bins = num_direction_bins
        self._num_class = num_class
        self._use_direction_classifier = use_direction_classifier
        self._box_code_size = box_code_size
        self._corr_patch_size = corr_patch_size
        self._corr_kernel_size = corr_kernel_size
        self._corr_dilation_patch = corr_dilation_patch
        self._voting_range = voting_range

        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        if len(num_upsample_filters) == 0:
            final_num_filters = self._num_out_filters
        else:
            final_num_filters = sum(num_upsample_filters)
        self.conv_cls = nn.Conv2d(final_num_filters * 2, num_cls, 1)
        self.conv_box = nn.Conv2d(final_num_filters * 2,
                                  num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(
                final_num_filters * 2, num_anchor_per_loc * num_direction_bins,
                1)
        '''
        - Output sizes oH and oW are no longer dependant of patch size, but only of kernel size and padding
        - Patch size patch_size is now the whole patch, and not only the radii.
        - stride1 is now stride
        - stride2 is dilation_patch, which behave like dilated convolutions
        - equivalent max_displacement is then dilation_patch * (patch_size - 1) / 2.
        '''
        self.correlation_sampler = SpatialCorrelationSampler(
            kernel_size=self._corr_kernel_size,
            patch_size=self._corr_patch_size,
            stride=1,
            # padding=1 + self._corr_kernel_size // 3,
            padding=2,
            dilation_patch=self._corr_dilation_patch)
        self.resample = Resample2d()
示例#15
0
    def __init__(self, md=4):
        """
        input: md --- maximum displacement (for correlation. default: 4), after warpping

        """
        super(FlowNetPwcLite2xA, self).__init__()

        # self.conv1a  = conv(1,   8, kernel_size=3, stride=2)
        # self.conv1aa = conv(8,  8, kernel_size=3, stride=1)
        # self.conv1b  = conv(8,  8, kernel_size=3, stride=1)
        self.conv2a = conv(1, 8, kernel_size=3, stride=2)
        self.conv2aa = conv(8, 8, kernel_size=3, stride=1)
        self.conv2b = conv(8, 8, kernel_size=3, stride=1)
        self.conv3a = conv(8, 32, kernel_size=3, stride=2)
        self.conv3aa = conv(32, 32, kernel_size=3, stride=1)
        self.conv3b = conv(32, 32, kernel_size=3, stride=1)
        self.conv4a = conv(32, 64, kernel_size=3, stride=2)
        self.conv4aa = conv(64, 64, kernel_size=3, stride=1)
        self.conv4b = conv(64, 32, kernel_size=3, stride=1)
        self.conv5a = conv(32, 64, kernel_size=3, stride=2)
        self.conv5aa = conv(64, 64, kernel_size=3, stride=1)
        self.conv5b = conv(64, 32, kernel_size=3, stride=1)
        self.conv6aa = conv(32, 64, kernel_size=3, stride=2)
        self.conv6a = conv(64, 64, kernel_size=3, stride=1)
        self.conv6b = conv(64, 64, kernel_size=3, stride=1)

        #self.corr    = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
        md2plus1 = md * 2 + 1
        self.corr6 = SpatialCorrelationSampler(patch_size=md2plus1,
                                               kernel_size=1,
                                               stride=1)
        md2plus1N = (md - 1) * 2 + 1
        self.corrN = SpatialCorrelationSampler(patch_size=md2plus1N,
                                               kernel_size=1,
                                               stride=1)
        self.leakyRELU = nn.LeakyReLU(0.1)

        nd = md2plus1**2
        dd = np.cumsum([128, 128, 96, 64, 32])

        od = nd
        self.conv6_0 = conv(od, 128, kernel_size=3, stride=1)
        self.conv6_1 = conv(od + dd[0], 128, kernel_size=3, stride=1)
        self.conv6_2 = conv(od + dd[1], 96, kernel_size=3, stride=1)
        self.conv6_3 = conv(od + dd[2], 64, kernel_size=3, stride=1)
        self.conv6_4 = conv(od + dd[3], 32, kernel_size=3, stride=1)
        self.predict_flow6 = predict_flow(od + dd[4])
        self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
        self.upfeat6 = deconv(od + dd[4],
                              16,
                              kernel_size=4,
                              stride=2,
                              padding=1)

        nd = md2plus1N**2

        od = nd + 32 + 2 + 16
        self.conv5_0 = conv(od, 128, kernel_size=3, stride=1)
        self.conv5_1 = conv(od + dd[0], 128, kernel_size=3, stride=1)
        self.conv5_2 = conv(od + dd[1], 96, kernel_size=3, stride=1)
        self.conv5_3 = conv(od + dd[2], 64, kernel_size=3, stride=1)
        self.conv5_4 = conv(od + dd[3], 32, kernel_size=3, stride=1)
        self.predict_flow5 = predict_flow(od + dd[4])
        self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
        self.upfeat5 = deconv(od + dd[4],
                              16,
                              kernel_size=4,
                              stride=2,
                              padding=1)

        od = nd + 32 + 2 + 16
        self.conv4_0 = conv(od, 128, kernel_size=3, stride=1)
        self.conv4_1 = conv(od + dd[0], 128, kernel_size=3, stride=1)
        self.conv4_2 = conv(od + dd[1], 96, kernel_size=3, stride=1)
        self.conv4_3 = conv(od + dd[2], 64, kernel_size=3, stride=1)
        self.conv4_4 = conv(od + dd[3], 32, kernel_size=3, stride=1)
        self.predict_flow4 = predict_flow(od + dd[4])
        self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
        self.upfeat4 = deconv(od + dd[4],
                              16,
                              kernel_size=4,
                              stride=2,
                              padding=1)

        od = nd + 32 + 2 + 16
        self.conv3_0 = conv(od, 128, kernel_size=3, stride=1)
        self.conv3_1 = conv(od + dd[0], 128, kernel_size=3, stride=1)
        self.conv3_2 = conv(od + dd[1], 96, kernel_size=3, stride=1)
        self.conv3_3 = conv(od + dd[2], 64, kernel_size=3, stride=1)
        self.conv3_4 = conv(od + dd[3], 32, kernel_size=3, stride=1)
        self.predict_flow3 = predict_flow(od + dd[4])
        self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
        self.upfeat3 = deconv(od + dd[4],
                              16,
                              kernel_size=4,
                              stride=2,
                              padding=1)

        od = nd + 8 + 2 + 16
        self.conv2_0 = conv(od, 128, kernel_size=3, stride=1)
        self.conv2_1 = conv(od + dd[0], 128, kernel_size=3, stride=1)
        self.conv2_2 = conv(od + dd[1], 96, kernel_size=3, stride=1)
        self.conv2_3 = conv(od + dd[2], 64, kernel_size=3, stride=1)
        self.conv2_4 = conv(od + dd[3], 32, kernel_size=3, stride=1)
        self.predict_flow2 = predict_flow(od + dd[4])
        self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)

        self.dc_conv1 = conv(od + dd[4],
                             128,
                             kernel_size=3,
                             stride=1,
                             padding=1,
                             dilation=1)
        self.dc_conv2 = conv(128,
                             128,
                             kernel_size=3,
                             stride=1,
                             padding=2,
                             dilation=2)
        self.dc_conv3 = conv(128,
                             128,
                             kernel_size=3,
                             stride=1,
                             padding=4,
                             dilation=4)
        self.dc_conv4 = conv(128,
                             96,
                             kernel_size=3,
                             stride=1,
                             padding=8,
                             dilation=8)
        self.dc_conv5 = conv(96,
                             64,
                             kernel_size=3,
                             stride=1,
                             padding=16,
                             dilation=16)
        self.dc_conv6 = conv(64,
                             32,
                             kernel_size=3,
                             stride=1,
                             padding=1,
                             dilation=1)
        self.dc_conv7 = predict_flow(32)

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
elif args.dtype == 'float':
    dtype = torch.float32
else:
    dtype = torch.float64

input1 = torch.randn(args.batch_size,
                     args.channel,
                     args.height,
                     args.width,
                     dtype=dtype,
                     device=device,
                     requires_grad=True)
input2 = torch.randn_like(input1)

correlation_sampler = SpatialCorrelationSampler(args.kernel_size, args.patch,
                                                args.stride, args.pad,
                                                args.dilation,
                                                args.patch_dilation)

# Force CUDA initialization
output = correlation_sampler(input1, input2)
print(output.size())
output.mean().backward()
forward_min = float('inf')
forward_time = 0
backward_min = float('inf')
backward_time = 0
for _ in trange(args.runs):
    correlation_sampler.zero_grad()

    start = time.time()
    output = correlation_sampler(input1, input2)
示例#17
0
    def __init__(self, use_loc=False, use_trk=False):
        super(STNNet, self).__init__()
        self.loc = use_loc
        self.trk = use_trk
        # backbone
        self.backbone = BaseNet()
        self.relu = nn.ReLU()

        # multi-scale combination
        self.deconv1 = nn.ConvTranspose2d(512,
                                          256,
                                          3,
                                          stride=2,
                                          padding=1,
                                          output_padding=1,
                                          dilation=2)
        self.deconv2 = nn.ConvTranspose2d(256,
                                          128,
                                          3,
                                          stride=2,
                                          padding=1,
                                          output_padding=1,
                                          dilation=2)

        self.conv1 = nn.Conv2d(512, 256, 1)
        self.conv2 = nn.Conv2d(256, 256, 3, padding=1, dilation=1)
        self.conv3 = nn.Conv2d(256, 128, 1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding=1, dilation=1)

        # attention layers
        self.s_weight1 = SpatialWeightLayer()
        self.s_weight2 = SpatialWeightLayer()
        self.s_weight3 = SpatialWeightLayer()

        # density output layers
        self.den_output_layer1 = nn.Conv2d(128, 1, kernel_size=1)
        self.den_output_layer2 = nn.Conv2d(256, 1, kernel_size=1)
        self.den_output_layer3 = nn.Conv2d(512, 1, kernel_size=1)

        # fix above parameters when training the association heads
        if self.trk:
            for para in self.parameters():
                para.requires_grad = False

        if self.loc:
            # localization output layers
            self.loc_output_layer1 = nn.Conv2d(128,
                                               2,
                                               3,
                                               padding=1,
                                               dilation=1)
            self.loc_output_layer2 = nn.Conv2d(256,
                                               2,
                                               3,
                                               padding=1,
                                               dilation=1)
            self.loc_output_layer3 = nn.Conv2d(512,
                                               2,
                                               3,
                                               padding=1,
                                               dilation=1)
            self.reg_output_layer1 = nn.Conv2d(128,
                                               2,
                                               3,
                                               padding=1,
                                               dilation=1)
            self.reg_output_layer2 = nn.Conv2d(256,
                                               2,
                                               3,
                                               padding=1,
                                               dilation=1)
            self.reg_output_layer3 = nn.Conv2d(512,
                                               2,
                                               3,
                                               padding=1,
                                               dilation=1)

            self.s_weight_loc = nn.Conv2d(6, 6, 3, padding=1)
            self.merge_loc = nn.Conv2d(6, 2, 1, padding=0)
            self.s_weight_reg = nn.Conv2d(6, 6, 3, padding=1)
            self.merge_reg = nn.Conv2d(6, 2, 1, padding=0)

        if self.trk:
            # tracking output layers
            self.corr_layer1 = SpatialCorrelationSampler(1, 11, 1, 0, 1)
            self.corr_layer2 = SpatialCorrelationSampler(1, 11, 1, 0, 1)
            self.corr_layer3 = SpatialCorrelationSampler(1, 11, 1, 0, 1)
            self.trk_output_layer1 = nn.Conv2d(363,
                                               128,
                                               3,
                                               padding=1,
                                               dilation=1)
            self.trk_output_layer2 = nn.Conv2d(128,
                                               64,
                                               3,
                                               padding=1,
                                               dilation=1)
            # graph layers
            self.gcn_layers = PointConvDensityClsSsg(num_classes=2, num_pt=128)

        # load weights of the backbone network
        mod = models.vgg16(pretrained=True)
        self._initialize_weights()
        my_models = self.backbone.state_dict()
        pre_models = list(mod.state_dict().items())
        count = 0
        for layer_name, value in my_models.items():
            prelayer_name, pre_weights = pre_models[count]
            my_models[layer_name] = pre_weights
            count += 1
        self.backbone.load_state_dict(my_models)