def predict_frame(self, oriImg):
        test_image = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(), 0), 2, 3), 1, 2),volatile=True).cuda()
        # print('Input Image Size: ', test_image.size())

        # Multiplier: A pyramid based scaling method to evaluate image from various scales.
        multiplier = [x * self.model_['boxsize'] / oriImg.shape[0] for x in self.param_['scale_search']]
        # print('Image Scaling Multipliers: ', multiplier, '\n')

        # Heatmap and Parts Affinity Field Data Structures
        heatmap_avg = torch.zeros((len(multiplier),19,oriImg.shape[0], oriImg.shape[1])).cuda()
        paf_avg = torch.zeros((len(multiplier),38,oriImg.shape[0], oriImg.shape[1])).cuda()

        # Compute Keypoint and Part Affinity Fields
        # print('Generating Keypoint Heatmap and Parts Affinity Field Predictions...')
        for m in range(len(multiplier)):
            # Set Image Scale
            scale = multiplier[m]
            h = int(oriImg.shape[0] * scale)
            w = int(oriImg.shape[1] * scale)
            # print('[', 'Multiplier: ', scale, '-', (w, h), ']')

            # Pad Image Corresponding to Detection Stride
            pad_h = 0 if (h % self.model_['stride'] == 0) else self.model_['stride'] - (h % self.model_['stride'])
            pad_w = 0 if (w % self.model_['stride'] == 0) else self.model_['stride'] - (w % self.model_['stride'])
            new_h = h + pad_h
            new_w = w + pad_w

            # Apply Image Resize Transformation
            imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
            imageToTest_padded, pad = util.padRightDownCorner(imageToTest, self.model_['stride'], self.model_['padValue'])
            imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5

            # Generate Predictions
            feed = Variable(T.from_numpy(imageToTest_padded)).cuda()
            output1, output2 = self.model(feed)

            # Scale Prediction Outputs to Corresponding Image Size
            heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2)
            paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1)

            # print('Heatmap Dim:', heatmap.size())   # (1, Joint Count, X, Y)
            # print('PAF Dim:', paf.size())           # (1, PAF Count, X, Y)
            # print()

            heatmap_avg[m] = heatmap[0].data
            paf_avg[m] = paf[0].data

        # Compute Average Values
        heatmap_avg = T.transpose(T.transpose(T.squeeze(T.mean(heatmap_avg, 0)),0,1),1,2).cuda()
        paf_avg = T.transpose(T.transpose(T.squeeze(T.mean(paf_avg, 0)),0,1),1,2).cuda()

        # Convert to Numpy Type
        heatmap_avg = heatmap_avg.cpu().numpy()
        paf_avg = paf_avg.cpu().numpy()

        '''
        # [Plotting & Visualizing Heatmap and PAF]

        # Plot Heapmap Probabilities
        # util.plot_heatmap(oriImg, heatmap_avg)
        # util.plot_joint_heatmap(oriImg, heatmap_avg, 1)

        # Plot Part-Affinity Vectors
        # util.plot_paf(oriImg, paf_avg, 4)
        '''

        # Compute Heapmap Peaks (Using Non-Maximum Supression Method)
        all_peaks = []
        peak_counter = 0
        joint_pt_lookup = dict()
        for part in range(18):
            # Smooth out heapmap with gaussian kernel to remove high frequency variation.
            map_ori = heatmap_avg[:,:,part]
            map = gaussian_filter(map_ori, sigma=3)

            map_left = np.zeros(map.shape)
            map_left[1:,:] = map[:-1,:]
            map_right = np.zeros(map.shape)
            map_right[:-1,:] = map[1:,:]
            map_up = np.zeros(map.shape)
            map_up[:,1:] = map[:,:-1]
            map_down = np.zeros(map.shape)
            map_down[:,:-1] = map[:,1:]

            # Compute Peak Based on Binary Threshold
            peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > self.param_['thre1']))
            peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse

            # Derive Joint Keypoint Peaks with Mapped ID with Probabilities
            peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]
            id = range(peak_counter, peak_counter + len(peaks))
            peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]

            # Create Joint Lookup Dictionary
            for pt in peaks_with_score_and_id:
                joint_pt_lookup[(pt[1], pt[0])] = pt[2:4]

            all_peaks.append(peaks_with_score_and_id)
            peak_counter += len(peaks)

        '''
        # [Plot KeyPoint (with Probabilities)]
        # util.plot_key_point(oriImg, all_peaks)
        '''
        # util.plot_all_keypoints(oriImg, all_peaks)

        # Load Joint Index and Sequences Data
        mapIdx = self.md.get_mapIdx()
        limbSeq = self.md.get_limbseq()

        # Compute Part-Affinity Fields
        connection_all = []
        special_k = []
        mid_num = 10

        for k in range(len(mapIdx)):
            score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]]
            # print(score_mid.shape)

            candA = all_peaks[limbSeq[k][0]-1]
            candB = all_peaks[limbSeq[k][1]-1]
            # print('Limb Seq Connection: [', limbSeq[k][0]-1, ',', limbSeq[k][1]-1, ']\n')

            nA = len(candA)
            nB = len(candB)
            indexA, indexB = limbSeq[k]

            if nA != 0 and nB != 0:
                connection_candidate = []
                for i in range(nA):
                    for j in range(nB):

                        # Compute Joint Unit Vector
                        vec = np.subtract(candB[j][:2], candA[i][:2])
                        norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1])
                        # Assert: Check if the norm is a not a zero vector.
                        if not np.any(norm):
                            #print('Exception: Norm is a zero-vector')
                            continue

                        # TODO: Save this vector!
                        vec = np.divide(vec, norm)
                        #print('Unit Vector: [',i, ', ', j, ']: ', str(vec))

                        startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num))
                        vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] for I in range(len(startend))])
                        vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] for I in range(len(startend))])

                        # Compute Components for Affinity Field Criterion
                        score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
                        score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)

                        # Check PAF Criterion
                        criterion1 = len(np.nonzero(score_midpts > self.param_['thre2'])[0]) > 0.8 * len(score_midpts)
                        criterion2 = score_with_dist_prior > 0
                        if criterion1 and criterion2:
                            connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])

                connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
                connection = np.zeros((0,5))

                for c in range(len(connection_candidate)):
                    i, j, s = connection_candidate[c][0:3]
                    if (i not in connection[:,3] and j not in connection[:,4]):
                        connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
                        if len(connection) >= min(nA, nB): break

                connection_all.append(connection)

                #print('\nConnections:')
                #print(connection)
                #print()
            else:
                # Handle Exception for Potential Missing Part Entities
                special_k.append(k)
                connection_all.append([])

        # Build Human Pose
        subset = -1 * np.ones((0, 20))
        candidate = np.array([item for sublist in all_peaks for item in sublist])

        for k in range(len(mapIdx)):
            if k not in special_k:
                partAs = connection_all[k][:,0]
                partBs = connection_all[k][:,1]
                indexA, indexB = np.array(limbSeq[k]) - 1

                for i in range(len(connection_all[k])):
                    found = 0
                    subset_idx = [-1, -1]

                    for j in range(len(subset)):
                        if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
                            subset_idx[found] = j
                            found += 1

                    if found == 1:
                        j = subset_idx[0]
                        if subset[j][indexB] != partBs[i]:
                            subset[j][indexB] = partBs[i]
                            subset[j][-1] += 1
                            subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
                    elif found == 2: # if found 2 and disjoint, merge them
                        j1, j2 = subset_idx
                        # print "found = 2"
                        membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]
                        if len(np.nonzero(membership == 2)[0]) == 0: #merge
                            subset[j1][:-2] += (subset[j2][:-2] + 1)
                            subset[j1][-2:] += subset[j2][-2:]
                            subset[j1][-2] += connection_all[k][i][2]
                            subset = np.delete(subset, j2, 0)
                        else: # as like found == 1
                            subset[j1][indexB] = partBs[i]
                            subset[j1][-1] += 1
                            subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]

                    # if find no partA in the subset, create a new subset
                    elif not found and k < 17:
                        row = -1 * np.ones(20)
                        row[indexA] = partAs[i]
                        row[indexB] = partBs[i]
                        row[-1] = 2
                        row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]
                        subset = np.vstack([subset, row])

        # Remove Rows of Subset with the Least Parts Available
        deleteIdx = [];
        for i in range(len(subset)):
            if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:
                deleteIdx.append(i)
        subset = np.delete(subset, deleteIdx, axis=0)

        # Setup Pose Dictionary Data Structure for Prediction Return
        joints_per_skeleton = [[] for i in range(len(subset))]
    	for n in range(len(subset)):
    		for i in range(18):
    			cidx = subset[n][i]
    			if cidx != -1:
    				y = candidate[cidx.astype(int), 0]
    				x = candidate[cidx.astype(int), 1]
    				joints_per_skeleton[n].append([y, x])
    			else:
    				joints_per_skeleton[n].append(None)

        return joints_per_skeleton
示例#2
0
    def forward(self, x):

        sources = list()
        arm_loc = list()
        arm_conf = list()
        feature_h = list()
        feature_l = list()

        # apply vgg up to conv4_3 relu
        for k in range(23):
            x = self.vgg[k](x)

        # s = self.L2Norm(x)
        x = self.L2Norm(x)
        conv4 = self.convs_4(x)
        conv4 = self.bn1(conv4)
        conv4 = self.relu(conv4)
        # conv4 = self.L2Norm(conv4)

        # apply vgg up to fc7
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)

        # feature_h.append(conv4)

        # apply spp
        for k, v in enumerate(self.spp):
            if k == 0:
                feature_h.append(v(conv4))
            else:
                feature_h.append(v(x))

        # apply bottleneck
        for k, v in enumerate(self.bottleneck):
            feature_l.append(self.relu(self.bn2(v(feature_h[k]))))
        # apply MSCA
        for i in range(len(feature_h)):
            if i == 0:
                k = i + 1
                temp = []
                for j in range(k, len(feature_h)):
                    unsample = nn.UpsamplingBilinear2d(scale_factor=2**j)
                    temp.append(unsample(feature_l[j]))
                temp.insert(i, feature_h[i])
                sources.append(
                    self.relu(self.bn2(self.convs(torch.cat(temp, 1)))))
            elif i == len(feature_h) - 1:
                k = i + 1
                temp = []
                for j in range(0, i):
                    scale = (1 / 2.)**(i - j)
                    downsample = nn.AdaptiveAvgPool2d(
                        output_size=(int(feature_l[j].size()[2] * scale),
                                     int(feature_l[j].size()[3] * scale)))
                    temp.append(downsample(feature_l[j]))
                    # unsample = nn.UpsamplingBilinear2d(scale_factor=2 ** j)
                    # temp.append(unsample(feature_l[j]))
                temp.insert(i, feature_h[i])
                sources.append(
                    self.relu(self.bn2(self.convs(torch.cat(temp, 1)))))
            else:
                k = i + 1
                temp = []
                for j in range(i):
                    scale = (1 / 2.)**(i - j)
                    downsample = nn.AdaptiveAvgPool2d(
                        output_size=(int(feature_l[j].size()[2] * scale),
                                     int(feature_l[j].size()[3] * scale)))
                    temp.append(downsample(feature_l[j]))
                for j in range(k, len(feature_h)):
                    scale = 2**(j - i)
                    unsample = nn.UpsamplingBilinear2d(scale_factor=scale)
                    temp.append(unsample(feature_l[j]))
                temp.insert(i, feature_h[i])
                sources.append(
                    self.relu(self.bn2(self.convs(torch.cat(temp, 1)))))

        for (x, l, c) in zip(sources, self.pfp_loc, self.pfp_conf):
            # for x in sources:
            # self.convs = nn.Conv2d(x.shape[1], 256, kernel_size=3, stride=1, padding=1, bias=False)
            arm_loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            arm_conf.append(c(x).permute(0, 2, 3, 1).contiguous())
        arm_loc = torch.cat([o.view(o.size(0), -1) for o in arm_loc], 1)
        arm_conf = torch.cat([o.view(o.size(0), -1) for o in arm_conf], 1)

        if self.status == "test":
            output = self.detect(
                arm_loc.view(arm_loc.size(0), -1, 4),  # loc preds
                self.softmax(
                    arm_conf.view(arm_conf.size(0), -1,
                                  self.num_classes)),  # conf preds
                self.priors.type(type(x.data))  # default boxes
            )
        else:
            output = (arm_loc.view(arm_loc.size(0), -1, 4),
                      arm_conf.view(arm_conf.size(0), -1,
                                    self.num_classes), self.priors)

        return output
示例#3
0
    def __init__(self, in_channels, num_classes, base_channels=64):
        super(UNetPlusPlus, self).__init__()
        self.block_i = 0

        # Downsampling
        self.op0_0 = ConvUnit(in_channels, base_channels)
        self.maxpool0 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.op1_0 = ConvUnit(base_channels, base_channels * 2)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.op2_0 = ConvUnit(base_channels * 2, base_channels * 4)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.op3_0 = ConvUnit(base_channels * 4, base_channels * 8)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.op4_0 = ConvUnit(base_channels * 8, base_channels * 16)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Upsampling
        self.op0_1 = ConvUnit(base_channels + base_channels * 2, base_channels)
        self.upsample0_1 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.op1_1 = ConvUnit(base_channels * 2 + base_channels * 4,
                              base_channels * 2)
        self.upsample1_1 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.op2_1 = ConvUnit(base_channels * 4 + base_channels * 8,
                              base_channels * 4)
        self.upsample2_1 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.op3_1 = ConvUnit(base_channels * 8 + base_channels * 16,
                              base_channels * 8)
        self.upsample3_1 = nn.UpsamplingBilinear2d(scale_factor=2)

        self.op0_2 = ConvUnit(base_channels * 2 + base_channels * 2,
                              base_channels)
        self.upsample0_2 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.op1_2 = ConvUnit(base_channels * 2 * 2 + base_channels * 4,
                              base_channels * 2)
        self.upsample1_2 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.op2_2 = ConvUnit(base_channels * 4 * 2 + base_channels * 8,
                              base_channels * 4)
        self.upsample2_2 = nn.UpsamplingBilinear2d(scale_factor=2)

        self.op0_3 = ConvUnit(base_channels * 3 + base_channels * 2,
                              base_channels)
        self.upsample0_3 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.op1_3 = ConvUnit(base_channels * 2 * 3 + base_channels * 4,
                              base_channels * 2)
        self.upsample1_3 = nn.UpsamplingBilinear2d(scale_factor=2)

        self.op0_4 = ConvUnit(base_channels * 4 + base_channels * 2,
                              base_channels)
        self.upsample0_4 = nn.UpsamplingBilinear2d(scale_factor=2)

        # Multi-task
        self.out_1 = nn.Conv2d(base_channels,
                               num_classes,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.out_2 = nn.Conv2d(base_channels,
                               num_classes,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.out_3 = nn.Conv2d(base_channels,
                               num_classes,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.out_4 = nn.Conv2d(base_channels,
                               num_classes,
                               kernel_size=1,
                               stride=1,
                               padding=0)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                base.init_weights(m, init_type='kaiming')
            elif isinstance(m, nn.BatchNorm2d):
                base.init_weights(m, init_type='kaiming')
示例#4
0
    def __init__(
        self,
        cfg,
        in_channels,
        num_layers=3,
        num_filters=256,
        kernel_size=3,
        output_kernel_size=1,
        rot_output_dim=3,
        mask_output_dim=1,
        freeze=False,
        num_classes=1,
        rot_class_aware=False,
        mask_class_aware=False,
        norm="BN",
        num_gn_groups=32,
    ):
        super().__init__()

        self.freeze = freeze
        self.concat = cfg.MODEL.CDPN.ROT_HEAD.ROT_CONCAT
        assert kernel_size == 2 or kernel_size == 3 or kernel_size == 4, "Only support kenerl 2, 3 and 4"
        padding = 1
        output_padding = 0
        if kernel_size == 3:
            output_padding = 1
        elif kernel_size == 2:
            padding = 0

        assert output_kernel_size == 1 or output_kernel_size == 3, "Only support kenerl 1 and 3"
        if output_kernel_size == 1:
            pad = 0
        elif output_kernel_size == 3:
            pad = 1

        if self.concat:
            _, _, channels, _ = resnet_spec[cfg.MODEL.CDPN.BACKBONE.NUM_LAYERS]
            self.features = nn.ModuleList()
            self.features.append(
                nn.ConvTranspose2d(
                    in_channels,
                    num_filters,
                    kernel_size=kernel_size,
                    stride=2,
                    padding=padding,
                    output_padding=output_padding,
                    bias=False,
                )
            )
            self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups))
            self.features.append(nn.ReLU(inplace=True))
            for i in range(num_layers):
                self.features.append(nn.UpsamplingBilinear2d(scale_factor=2))
                self.features.append(
                    nn.Conv2d(
                        num_filters + channels[-2 - i], num_filters, kernel_size=3, stride=1, padding=1, bias=False
                    )
                )
                self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups))
                self.features.append(nn.ReLU(inplace=True))

                self.features.append(
                    nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=1, padding=1, bias=False)
                )
                self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups))
                self.features.append(nn.ReLU(inplace=True))
        else:
            self.features = nn.ModuleList()
            self.features.append(
                nn.ConvTranspose2d(
                    in_channels,
                    num_filters,
                    kernel_size=kernel_size,
                    stride=2,
                    padding=padding,
                    output_padding=output_padding,
                    bias=False,
                )
            )
            self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups))
            self.features.append(nn.ReLU(inplace=True))
            for i in range(num_layers):
                # _in_channels = in_channels if i == 0 else num_filters
                # self.features.append(
                #    nn.ConvTranspose2d(_in_channels, num_filters, kernel_size=kernel_size, stride=2, padding=padding,
                #                       output_padding=output_padding, bias=False))
                # self.features.append(nn.BatchNorm2d(num_filters))
                # self.features.append(nn.ReLU(inplace=True))
                if i >= 1:
                    self.features.append(nn.UpsamplingBilinear2d(scale_factor=2))
                self.features.append(
                    nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=1, padding=1, bias=False)
                )
                self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups))
                self.features.append(nn.ReLU(inplace=True))

                self.features.append(
                    nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=1, padding=1, bias=False)
                )
                self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups))
                self.features.append(nn.ReLU(inplace=True))

        self.rot_output_dim = rot_output_dim
        if rot_class_aware:
            self.rot_output_dim *= num_classes
        self.mask_output_dim = mask_output_dim
        if mask_class_aware:
            self.mask_output_dim *= num_classes
        self.features.append(
            nn.Conv2d(
                num_filters,
                self.mask_output_dim + self.rot_output_dim,
                kernel_size=output_kernel_size,
                padding=pad,
                bias=True,
            )
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                normal_init(m, std=0.001)
            elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
                constant_init(m, 1)
            elif isinstance(m, nn.ConvTranspose2d):
                normal_init(m, std=0.001)
示例#5
0
    def _preprocess(modules: List, up: bool) -> List:
        if up:
            modules.append(nn.UpsamplingBilinear2d(scale_factor=2))

        return modules
示例#6
0
    def __init__(self, input_nc, output_nc):
        super(SEAUNet, self).__init__()

        self.down1 = nn.Sequential(
            nn.Conv2d(input_nc, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(True), SEAB(4, 64, 64, 32))
        self.down2 = nn.Sequential(
            nn.AvgPool2d(3, 2, 1),
            SEAB(4, 64, 64, 32),
        )
        self.down3 = nn.Sequential(
            nn.AvgPool2d(3, 2, 1),
            SEAB(4, 64, 128, 32),
        )
        self.down4 = nn.Sequential(nn.AvgPool2d(3, 2, 1),
                                   SEAB(
                                       4,
                                       128,
                                       128,
                                       32,
                                   ))
        self.down5 = nn.Sequential(
            nn.AvgPool2d(3, 2, 1),
            SEAB(8, 128, 256, 32),
        )
        self.down6 = nn.Sequential(
            nn.AvgPool2d(3, 2, 1),
            SEAB(8, 256, 256, 32),
        )
        self.down7 = nn.Sequential(
            nn.AvgPool2d(3, 2, 1),
            SEAB(8, 256, 512, 32),
        )
        self.down8 = nn.Sequential(
            nn.AvgPool2d(3, 2, 1),
            SEAB(8, 512, 512, 32),
        )
        self.up8 = nn.Sequential(
            nn.AvgPool2d(2, 2),
            SEAB(8, 512, 512, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up7 = nn.Sequential(
            SEAB(8, 1024, 512, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up6 = nn.Sequential(
            SEAB(8, 1024, 256, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up5 = nn.Sequential(
            SEAB(8, 512, 256, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up4 = nn.Sequential(
            SEAB(4, 512, 128, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up3 = nn.Sequential(
            SEAB(4, 256, 128, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up2 = nn.Sequential(
            SEAB(4, 256, 64, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up1 = nn.Sequential(
            SEAB(4, 128, 64, 32),
            nn.UpsamplingBilinear2d(scale_factor=2),
        )
        self.up0 = nn.Sequential(SEAB(4, 128, 64, 32),
                                 nn.ConvTranspose2d(64, 64, 4, 2, 1))

        self.to_rgb_Bsub = nn.Sequential(
            nn.Conv2d(32, output_nc, kernel_size=1, bias=False), nn.Tanh())
        self.to_rgb_Bout = nn.Sequential(
            nn.Conv2d(32, output_nc, kernel_size=1, bias=False), nn.Tanh())
    def __init__(self, cfg):
        super(deeplabv3plus, self).__init__()
        self.backbone = None
        self.backbone_layers = None
        input_channel = 2048
        self.aspp = ASPP(dim_in=input_channel,
                         dim_out=cfg.MODEL_ASPP_OUTDIM,
                         rate=16 // cfg.MODEL_OUTPUT_STRIDE,
                         bn_mom=cfg.TRAIN_BN_MOM)
        self.dropout1 = nn.Dropout(0.5)
        self.upsample4 = nn.UpsamplingBilinear2d(scale_factor=4)
        self.upsample_sub = nn.UpsamplingBilinear2d(
            scale_factor=cfg.MODEL_OUTPUT_STRIDE // 4)

        indim = 256
        self.shortcut_conv = nn.Sequential(
            nn.Conv2d(indim,
                      cfg.MODEL_SHORTCUT_DIM,
                      cfg.MODEL_SHORTCUT_KERNEL,
                      1,
                      padding=cfg.MODEL_SHORTCUT_KERNEL // 2,
                      bias=True),
            SynchronizedBatchNorm2d(cfg.MODEL_SHORTCUT_DIM,
                                    momentum=cfg.TRAIN_BN_MOM),
            nn.ReLU(inplace=True),
        )
        self.cat_conv = nn.Sequential(
            nn.Conv2d(cfg.MODEL_ASPP_OUTDIM + cfg.MODEL_SHORTCUT_DIM,
                      cfg.MODEL_ASPP_OUTDIM,
                      3,
                      1,
                      padding=1,
                      bias=True),
            SynchronizedBatchNorm2d(cfg.MODEL_ASPP_OUTDIM,
                                    momentum=cfg.TRAIN_BN_MOM),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Conv2d(cfg.MODEL_ASPP_OUTDIM,
                      cfg.MODEL_ASPP_OUTDIM,
                      3,
                      1,
                      padding=1,
                      bias=True),
            SynchronizedBatchNorm2d(cfg.MODEL_ASPP_OUTDIM,
                                    momentum=cfg.TRAIN_BN_MOM),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
        )
        self.cls_conv = nn.Conv2d(cfg.MODEL_ASPP_OUTDIM,
                                  cfg.MODEL_NUM_CLASSES,
                                  1,
                                  1,
                                  padding=0)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, SynchronizedBatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        self.backbone = build_backbone(cfg.MODEL_BACKBONE,
                                       os=cfg.MODEL_OUTPUT_STRIDE)
        self.backbone_layers = self.backbone.get_layers()
示例#8
0
    def __init__(self,
                 input_channels=1,
                 filter_sizes=None,
                 l3_k_size=5,
                 dilations=None):
        super().__init__()

        if filter_sizes is None:
            filter_sizes = [
                16,  # First set of convs
                16,  # Second set of convs
                32,  # Dilated convs
                16
            ]  # Transpose Convs

        if dilations is None:
            dilations = [2, 4]

        self.features = nn.Sequential(
            # 4 conv layers.
            nn.Conv2d(input_channels,
                      filter_sizes[0],
                      kernel_size=11,
                      stride=1,
                      padding=5,
                      bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(filter_sizes[0],
                      filter_sizes[0],
                      kernel_size=5,
                      stride=1,
                      padding=2,
                      bias=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(filter_sizes[0],
                      filter_sizes[1],
                      kernel_size=5,
                      stride=1,
                      padding=2,
                      bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(filter_sizes[1],
                      filter_sizes[1],
                      kernel_size=5,
                      stride=1,
                      padding=2,
                      bias=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Dilated convolutions.
            nn.Conv2d(filter_sizes[1],
                      filter_sizes[2],
                      kernel_size=l3_k_size,
                      dilation=dilations[0],
                      stride=1,
                      padding=(l3_k_size // 2 * dilations[0]),
                      bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(filter_sizes[2],
                      filter_sizes[2],
                      kernel_size=l3_k_size,
                      dilation=dilations[1],
                      stride=1,
                      padding=(l3_k_size // 2 * dilations[1]),
                      bias=True),
            nn.ReLU(inplace=True),

            # Output layers
            nn.UpsamplingBilinear2d(scale_factor=2),
            nn.Conv2d(filter_sizes[2], filter_sizes[3], 3, padding=1),
            nn.ReLU(inplace=True),
            nn.UpsamplingBilinear2d(scale_factor=2),
            nn.Conv2d(filter_sizes[3], filter_sizes[3], 3, padding=1),
            nn.ReLU(inplace=True),
        )
        self.pos_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1)
        self.cos_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1)
        self.sin_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1)
        self.width_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1)

        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                nn.init.xavier_uniform_(m.weight, gain=1)
示例#9
0
    def __init__(self,
                 input_nc,
                 output_nc,
                 ngf=64,
                 use_dropout=False,
                 n_blocks=4,
                 padding_type='reflect',
                 anime=False):
        assert (n_blocks >= 0)
        super(Generator, self).__init__()
        self.n_blocks = n_blocks
        instance_norm_layer = functools.partial(nn.InstanceNorm2d,
                                                affine=True,
                                                track_running_stats=False)
        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=True),
            nn.InstanceNorm2d(ngf, affine=True),
            nn.ReLU(True)
        ]

        n_downsampling = 2
        for i in range(n_downsampling):  # add downsampling layers
            mult = 2**i
            model += [
                nn.Conv2d(ngf * mult,
                          ngf * mult * 2,
                          kernel_size=3,
                          stride=2,
                          padding=1,
                          bias=True),
                nn.InstanceNorm2d(ngf * mult * 2, affine=True),
                nn.ReLU(True)
            ]

        mult = 2**n_downsampling
        for i in range(n_blocks):  # add ResNet blocks
            model += [
                ResnetBlock(ngf * mult,
                            padding_type=padding_type,
                            norm_layer=instance_norm_layer,
                            use_dropout=use_dropout,
                            use_bias=True)
            ]
        self.encoder = nn.Sequential(*model)

        # CAM
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.gmp = nn.AdaptiveMaxPool2d(1)
        self.cam_w = nn.Parameter(torch.FloatTensor(ngf * mult, 1))
        nn.init.xavier_uniform_(self.cam_w)
        self.cam_bias = nn.Parameter(torch.FloatTensor(1))
        self.cam_bias.data.fill_(0)

        self.conv1x1 = nn.Sequential(
            nn.Conv2d(2 * ngf * mult, ngf * mult, 1, 1),
            nn.ReLU(),
        )
        # MLP
        self.mlp = MLP(ngf * mult)

        adain_resblock = []
        for i in range(n_blocks):
            adain_resblock.append(ResBlockByAdaLIN(ngf * mult, anime))
        self.adain_resblocks = nn.ModuleList(adain_resblock)

        decoder = []
        for i in range(n_downsampling):
            decoder.append(nn.UpsamplingBilinear2d(scale_factor=2))
            decoder.append(nn.ReflectionPad2d(1))
            decoder.append(nn.Conv2d(ngf * mult, ngf * mult // 2, 3, 1, 0))
            decoder.append(LayerInstanceNorm(ngf * mult // 2))
            decoder.append(nn.ReLU())

            mult = mult // 2
        decoder.extend([
            nn.ReflectionPad2d(3),
            nn.Conv2d(ngf, output_nc, 7, 1),
            nn.Tanh()
        ])
        self.decoder = nn.Sequential(*decoder)
示例#10
0
    def __init__(self, n_channels=16, n_blocks=1):
        super(SRResNet, self).__init__()
        self.n_channels = n_channels

        self.inConv = nn.Conv2d(in_channels=3,
                                out_channels=self.n_channels,
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=True)
        self.inRelu = nn.ReLU(inplace=True)
        # image is 120*120*16

        #encoder
        self.resBlocks1 = self.make_block_layers(n_blocks, DownResidual,
                                                 self.n_channels)  #60*60*32
        self.resBlocks2 = self.make_block_layers(
            n_blocks, DownResidual, self.n_channels * 2)  #30*30*64
        self.resBlocks3 = self.make_block_layers(
            n_blocks, DownResidual, self.n_channels * 4)  #15*15*128
        self.resBlocks4 = self.make_block_layers(n_blocks, DownResidual,
                                                 self.n_channels * 8)  #8*8*256
        self.resBlocks5 = self.make_block_layers(
            n_blocks, DownResidual, self.n_channels * 16)  #4*4*512
        self.resBlocks6 = self.make_block_layers(
            n_blocks, DownResidual, self.n_channels * 32)  #2*2*1024

        # FC
        self.fc1 = nn.Linear(2 * 2 * 1024, 1 * 1 * 1024)
        #self.fc_bn1 = nn.BatchNorm2d(1*1*1024)
        self.fc_relu1 = nn.ReLU(inplace=True)

        self.fc2 = nn.Linear(1 * 1 * 1024, 4 * 4 * 512)
        #self.fc_bn1 = nn.BatchNorm2d(4*4*512)
        self.fc_relu2 = nn.ReLU(inplace=True)
        # FC

        #decoder
        self.resBlocks7 = self.make_block_layers(
            n_blocks, UpResidual, self.n_channels * 32)  # 8*8*256
        self.resBlocks8 = self.make_block_layers(
            n_blocks, UpResidual, self.n_channels * 16)  # 16*16*128
        self.resBlocks9 = self.make_block_layers(
            n_blocks, UpResidual, self.n_channels * 8)  # 32*32*64
        self.resBlocks10 = self.make_block_layers(
            n_blocks, UpResidual, self.n_channels * 4)  # 64*64*32
        self.resBlocks11 = self.make_block_layers(
            n_blocks, UpResidual, self.n_channels * 2)  # 128*128*16
        self.downsample = nn.UpsamplingBilinear2d(size=(120, 120))
        #120*120

        self.Conv1 = nn.Conv2d(in_channels=16,
                               out_channels=3,
                               kernel_size=1,
                               padding=0,
                               bias=True)
        self.bn1 = nn.BatchNorm2d(3)
        self.relu1 = nn.ReLU(inplace=True)

        self.outConv = nn.Conv2d(in_channels=3,
                                 out_channels=3,
                                 kernel_size=1,
                                 padding=0,
                                 bias=True)
        self.tan = nn.Tanh()
示例#11
0
    def __init__(self, mode='combined', activate=None):
        super(SegNet, self).__init__()
        _, self.data_type = get_device()
        self.mode = mode
        self.activate = activate

        self.dropout = nn.Dropout2d(0.3)
        self.maxpool2d = nn.MaxPool2d(2)
        self.upsample = nn.UpsamplingBilinear2d(scale_factor=2)

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.conv2 = nn.Conv2d(64,
                               128,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.conv3 = nn.Conv2d(128,
                               256,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.conv4 = nn.Conv2d(256,
                               512,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)

        self.deconv1 = nn.ConvTranspose2d(512,
                                          256,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1,
                                          bias=True)
        self.deconv2 = nn.ConvTranspose2d(256,
                                          128,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1,
                                          bias=True)
        self.deconv3 = nn.ConvTranspose2d(128,
                                          64,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1,
                                          bias=True)
        self.deconv4 = nn.ConvTranspose2d(64,
                                          3,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1,
                                          bias=True)
        self.batch_norm = nn.BatchNorm2d(3)

        self.threshold1 = nn.Threshold(0.25, 0)
        self.threshold2 = nn.Threshold(0.5, 0)
        self.threshold3 = nn.Threshold(0.75, 0)
        self.maxpool1d = nn.MaxPool1d(3, stride=2, return_indices=True)
示例#12
0
def main():
    """Create the model and start the training."""

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    h, w = map(int, args.input_size_target.split(','))
    input_size_target = (h, w)

    h, w = map(int, args.com_size.split(','))
    com_size = (h, w)

    ############################
    #validation data
    testloader = data.DataLoader(cityscapesDataSet(args.data_dir_target,
                                                   args.data_list_target_val,
                                                   crop_size=input_size,
                                                   mean=IMG_MEAN,
                                                   scale=False,
                                                   mirror=False,
                                                   set=args.set_val),
                                 batch_size=1,
                                 shuffle=False,
                                 pin_memory=True)
    with open('./dataset/cityscapes_list/info.json', 'r') as fp:
        info = json.load(fp)
    mapping = np.array(info['label2train'], dtype=np.int)
    label_path_list = './dataset/cityscapes_list/label.txt'
    gt_imgs = open(label_path_list, 'r').read().splitlines()
    gt_imgs = [join('./data/Cityscapes/data/gtFine/val', x) for x in gt_imgs]

    interp_val = nn.UpsamplingBilinear2d(size=(com_size[1], com_size[0]))

    ############################

    cudnn.enabled = True

    # Create network
    if args.model == 'DeepLab':
        model = Res_Deeplab(num_classes=args.num_classes)
        #   if args.restore_from[:4] == 'http' :
        #       saved_state_dict = model_zoo.load_url(args.restore_from)
        #   else:
        saved_state_dict = torch.load(args.restore_from)

        #new_params = model.state_dict().copy()
        #   for i in saved_state_dict:
        #       # Scale.layer5.conv2d_list.3.weight
        #       i_parts = i.split('.')
        #       # print i_parts
        #       if not args.num_classes == 19 or not i_parts[1] == 'layer5':
        #           new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
        # print i_parts
        model.load_state_dict(saved_state_dict)

    model.train()
    model.cuda(args.gpu)

    cudnn.benchmark = True

    # init D
    model_D1 = FCDiscriminator(num_classes=args.num_classes)

    model_D1.train()
    model_D1.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(GTA5DataSet(args.data_dir,
                                              args.data_list,
                                              max_iters=args.num_steps *
                                              args.iter_size * args.batch_size,
                                              crop_size=input_size,
                                              scale=args.random_scale,
                                              mirror=args.random_mirror,
                                              mean=IMG_MEAN),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=False,
        mirror=args.random_mirror,
        mean=IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)

    targetloader_iter = enumerate(targetloader)

    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D1 = optim.Adam(model_D1.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D1.zero_grad()

    bce_loss = torch.nn.BCEWithLogitsLoss()

    interp = nn.UpsamplingBilinear2d(size=(input_size[1], input_size[0]))
    interp_target = nn.UpsamplingBilinear2d(size=(input_size_target[1],
                                                  input_size_target[0]))

    # labels for adversarial training
    source_label = 0
    target_label = 1

    for i_iter in range(args.num_steps):

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D1.zero_grad()
        adjust_learning_rate_D(optimizer_D1, i_iter)

        for sub_i in range(args.iter_size):

            # train G
            for param in model_D1.parameters():
                param.requires_grad = False

            _, batch = next(trainloader_iter)
            images_source, labels, _, _ = batch
            images_source = Variable(images_source).cuda(args.gpu)
            pred1, feature = model(images_source)
            pred1 = interp(pred1)
            loss_seg1 = loss_calc(pred1, labels, args.gpu)

            D_out1 = model_D1(feature)
            loss_D1_source = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(source_label)).cuda(
                            args.gpu))

            _, batch = next(targetloader_iter)
            images_target, _, _ = batch
            images_target = Variable(images_target).cuda(args.gpu)

            _, feature_target = model(images_target)
            D_out1 = model_D1(feature_target)
            loss_D1_target = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(target_label)).cuda(
                            args.gpu))

            loss = loss_seg1 + args.lambda_adv_target1 * (-loss_D1_source -
                                                          loss_D1_target)
            loss.backward()
            loss_seg_value1 += loss_seg1.data.item()
            loss_adv_target = loss_D1_source + loss_D1_target
            loss_adv_target_value1 = loss_adv_target.data.item()

            optimizer.step()

            # train D
            for param in model_D1.parameters():
                param.requires_grad = True

            pred1, feature = model(images_source)
            feature = feature.detach()
            D_out1 = model_D1(feature)
            loss_D1_source = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(source_label)).cuda(
                            args.gpu))

            _, feature_target = model(images_target)
            feature_target = feature_target.detach()
            D_out1 = model_D1(feature_target)
            loss_D1_target = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(target_label)).cuda(
                            args.gpu))

            loss_D1 = loss_D1_source + loss_D1_target

            loss_D1.backward()

            loss_D_value1 = loss_D1.data.item()
            optimizer_D1.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_adv1 = {3:.3f}  loss_D1 = {4:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value1,
                    loss_adv_target_value1, loss_D_value1))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth'))

            hist = np.zeros((19, 19))

            f = open(args.results_dir, 'a')
            for index, batch in enumerate(testloader):
                print(index)
                image, _, name = batch
                output1, output2 = model(
                    Variable(image, volatile=True).cuda(args.gpu))
                pred = interp_val(output1)
                pred = pred[0].permute(1, 2, 0)
                #      print(pred.shape)
                pred = torch.max(pred, 2)[1].byte()
                pred = pred.data.cpu().numpy()
                label = Image.open(gt_imgs[index])
                label = np.array(label.resize(com_size, Image.NEAREST))
                label = label_mapping(label, mapping)
                #      print("fengmao,",np.max(label),np.max(pred))
                hist += fast_hist(label.flatten(), pred.flatten(), 19)

            mIoUs = per_class_iu(hist)
            mIoU = round(np.nanmean(mIoUs) * 100, 2)
            print(mIoU)
            f.write('i_iter:{:d},        miou:{:0.3f} \n'.format(i_iter, mIoU))
            f.close()
示例#13
0
    pytorch_list = [];
    for i in img_list:
        img = np.zeros((513,513,3));

        img_temp = cv2.imread(os.path.join(im_path,i[:-1]+'.jpg')).astype(float)
        img_original = img_temp
        img_temp[:,:,0] = img_temp[:,:,0] - 104.008
        img_temp[:,:,1] = img_temp[:,:,1] - 116.669
        img_temp[:,:,2] = img_temp[:,:,2] - 122.675
        img[:img_temp.shape[0],:img_temp.shape[1],:] = img_temp
        gt = cv2.imread(os.path.join(gt_path,i[:-1]+'.png'),0)
        gt[gt==255] = 0
        with torch.no_grad():
            output = model(Variable(torch.from_numpy(img[np.newaxis, :].transpose(0,3,1,2)).float(),volatile = True).cuda(gpu0))
            interp = nn.UpsamplingBilinear2d(size=(513, 513))
            output = interp(output[3]).cpu().data[0].numpy()
        output = output[:,:img_temp.shape[0],:img_temp.shape[1]]
        
        output = output.transpose(1,2,0)
        output = np.argmax(output,axis = 2)
        if args['--visualize']:
            plt.subplot(3, 1, 1)
            plt.imshow(img_original)
            plt.subplot(3, 1, 2)
            plt.imshow(gt)
            plt.subplot(3, 1, 3)
            plt.imshow(output)
            plt.show()

        iou_pytorch = get_iou(output,gt)       
示例#14
0
    def __init__(self,
                 ver_dim,
                 seg_dim,
                 spherical_used=False,
                 fcdim=256,
                 s8dim=128,
                 s4dim=64,
                 s2dim=32,
                 raw_dim=32):
        super(Resnet18, self).__init__()

        # Load the pretrained weights, remove avg pool
        # layer and get the output stride of 8
        resnet18_8s = resnet18(fully_conv=True,
                               pretrained=True,
                               output_stride=8,
                               remove_avg_pool_layer=True)

        self.ver_dim = ver_dim
        self.seg_dim = seg_dim
        self.spherical_used = spherical_used

        # Randomly initialize the 1x1 Conv scoring layer
        resnet18_8s.fc = nn.Sequential(
            nn.Conv2d(resnet18_8s.inplanes, fcdim, 3, 1, 1, bias=False),
            nn.BatchNorm2d(fcdim), nn.ReLU(True))
        self.resnet18_8s = resnet18_8s

        # x8s->128
        self.conv8s = nn.Sequential(
            nn.Conv2d(128 + fcdim, s8dim, 3, 1, 1, bias=False),
            nn.BatchNorm2d(s8dim), nn.LeakyReLU(0.1, True))
        self.up8sto4s = nn.UpsamplingBilinear2d(scale_factor=2)
        # x4s->64
        self.conv4s = nn.Sequential(
            nn.Conv2d(64 + s8dim, s4dim, 3, 1, 1, bias=False),
            nn.BatchNorm2d(s4dim), nn.LeakyReLU(0.1, True))

        # x2s->64
        self.conv2s = nn.Sequential(
            nn.Conv2d(64 + s4dim, s2dim, 3, 1, 1, bias=False),
            nn.BatchNorm2d(s2dim), nn.LeakyReLU(0.1, True))
        self.up4sto2s = nn.UpsamplingBilinear2d(scale_factor=2)

        self.convraw = nn.Sequential(
            nn.Conv2d(3 + s2dim, raw_dim, 3, 1, 1, bias=False),
            nn.BatchNorm2d(raw_dim), nn.LeakyReLU(0.1, True),
            nn.Conv2d(raw_dim, seg_dim + ver_dim, 1, 1))
        if self.spherical_used:
            self.convsignX = nn.Sequential(
                nn.Conv2d(3 + s2dim, raw_dim, 3, 1, 1, bias=False),
                nn.BatchNorm2d(raw_dim),
                nn.LeakyReLU(0.1, True),
                nn.Conv2d(raw_dim, ver_dim, 1, 1)  # N * 4, label have 4 dims
            )
            self.convsignY = nn.Sequential(
                nn.Conv2d(3 + s2dim, raw_dim, 3, 1, 1, bias=False),
                nn.BatchNorm2d(raw_dim),
                nn.LeakyReLU(0.1, True),
                nn.Conv2d(raw_dim, ver_dim, 1, 1)  # N * 4, label have 4 dims
            )
        self.up2storaw = nn.UpsamplingBilinear2d(scale_factor=2)
示例#15
0
    def __init__(self, nz=100, ngf=160, nc=3, init_type="normal"):
        super(_Generator_ResizeConv, self).__init__()
        self.init_type = init_type
        self.normfunc = nn.BatchNorm2d
        self.model = nn.Sequential(
            OrderedDict([
                # ----- layer 1
                ("unsample1", nn.UpsamplingBilinear2d(scale_factor=2)),
                ("conv1",
                 nn.Conv2d(in_channels=nz,
                           out_channels=ngf * 16,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)),
                ("bn1", self.normfunc(num_features=ngf * 16)),
                ("relu1", nn.ReLU(inplace=True)),

                # ----- layer 2
                ("unsample2", nn.UpsamplingBilinear2d(scale_factor=4)),
                ("conv2",
                 nn.Conv2d(in_channels=ngf * 16,
                           out_channels=ngf * 8,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)),
                ("bn2", self.normfunc(num_features=ngf * 8)),
                ("relu2", nn.ReLU(inplace=True)),

                # ----- layer 3
                ("unsample3", nn.UpsamplingBilinear2d(scale_factor=2)),
                ("conv3",
                 nn.Conv2d(in_channels=ngf * 8,
                           out_channels=ngf * 4,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)),
                ("bn3", self.normfunc(num_features=ngf * 4)),
                ("relu3", nn.ReLU(inplace=True)),

                # ----- layer 4
                ("unsample4", nn.UpsamplingBilinear2d(scale_factor=2)),
                ("conv4",
                 nn.Conv2d(in_channels=ngf * 4,
                           out_channels=ngf * 2,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)),
                ("bn4", self.normfunc(num_features=ngf * 2)),
                ("relu4", nn.ReLU(inplace=True)),

                # ----- layer 5
                ("unsample5", nn.UpsamplingBilinear2d(scale_factor=2)),
                ("conv5",
                 nn.Conv2d(in_channels=ngf * 2,
                           out_channels=ngf,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)),
                ("bn5", self.normfunc(num_features=ngf)),
                ("relu5", nn.ReLU(inplace=True)),
                ("unsample6", nn.UpsamplingBilinear2d(scale_factor=2)),
                ("conv6",
                 nn.Conv2d(in_channels=ngf,
                           out_channels=nc,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)),
                ("tanh", nn.Tanh()),
            ]))
示例#16
0
 def __init__(self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1):
     conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
     upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
     activation = Activation(activation)
     super().__init__(conv2d, upsampling, activation)
示例#17
0
 def forward(self, x, skip):
     upsample = nn.UpsamplingBilinear2d(size=skip.size()[2:])
     x = upsample(x)
     skip = self.conv_channel_adjust(skip)
     fused = self.conv_fusion(x + skip)
     return fused
示例#18
0
    def __init__(self, anchor_num=3, class_num=20):
        super(YoloNano, self).__init__()
        self.anchor_num = anchor_num
        self.class_num = class_num
        self.c1 = nn.Sequential(
            nn.Conv2d(3, 12, 3, padding=1),  #(12,416,416)
            nn.BatchNorm2d(12),
            nn.LeakyReLU())
        self.c2 = nn.Sequential(
            nn.Conv2d(12, 24, 3, stride=2, padding=1),  #(24,208,208)
            nn.BatchNorm2d(24),
            nn.LeakyReLU())
        self.pep1 = PEP(24, 7)  #(24,208,208)
        self.ep1 = EP(24, 70)  #(70,104,104)
        self.pep2 = PEP(70, 25)  #(70,104,104)
        self.pep3 = PEP(70, 24)  #(70,104,104)
        self.ep2 = EP(70, 150)  #(150,52,52)
        self.pep4 = PEP(150, 56)  #(150,52,52)
        self.c3 = nn.Sequential(
            nn.Conv2d(150, 150, 1, stride=1),  #(150,52,52)
            nn.BatchNorm2d(150),
            nn.LeakyReLU())
        self.fca = FCA(150, 8)
        self.pep5 = PEP(150, 73)  # (150,52,52)
        self.pep6 = PEP(150, 71)  # (150,52,52)
        self.pep7 = PEP(150, 75)  # (150,52,52)
        self.ep3 = EP(150, 325)  # (325,26,26)
        self.pep8 = PEP(325, 132)  # (325,26,26)
        self.pep9 = PEP(325, 124)  # (325,26,26)
        self.pep10 = PEP(325, 141)  # (325,26,26)
        self.pep11 = PEP(325, 140)  # (325,26,26)
        self.pep12 = PEP(325, 137)  # (325,26,26)
        self.pep13 = PEP(325, 135)  # (325,26,26)
        self.pep14 = PEP(325, 133)  # (325,26,26)
        self.pep15 = PEP(325, 140)  # (325,26,26)
        self.ep4 = EP(325, 545)  # (545,13,13)
        self.pep16 = PEP(545, 276)  # (545,13,13)
        self.c4 = nn.Sequential(
            nn.Conv2d(545, 230, 1),  #(230,13,13)
            nn.BatchNorm2d(230),
            nn.LeakyReLU())
        self.ep5 = EP(230, 489, stride=1)  # (489,13,13)
        self.pep17 = PEP(489, 213, output_channel=469)  # (469,13,13)
        self.c5 = nn.Sequential(
            nn.Conv2d(469, 189, 1),  #(189,13,13)
            nn.BatchNorm2d(189),
            nn.LeakyReLU())
        self.c6 = nn.Sequential(
            nn.Conv2d(189, 105, 1),  #(189,13,13)
            nn.BatchNorm2d(105),
            nn.LeakyReLU())
        self.upsample1 = nn.UpsamplingBilinear2d(size=(26, 26))
        self.pep18 = PEP(430, 113, output_channel=325)  # (325,26,26)
        self.pep19 = PEP(325, 113, output_channel=207)  # (207,26,26)
        self.c7 = nn.Sequential(
            nn.Conv2d(207, 98, 1),  #(98,26,26)
            nn.BatchNorm2d(98),
            nn.LeakyReLU())
        self.c8 = nn.Sequential(
            nn.Conv2d(98, 47, 1),  #(47,26,26)
            nn.BatchNorm2d(47),
            nn.LeakyReLU())
        self.upsample2 = nn.UpsamplingBilinear2d(size=(52, 52))
        self.pep20 = PEP(197, 58, output_channel=122)  # (207,52,52)
        self.pep21 = PEP(122, 52, output_channel=87)  # (87,52,52)
        self.pep22 = PEP(87, 47, output_channel=93)  # (93,52,52)
        self.c9 = nn.Conv2d(93, self.anchor_num * (5 + self.class_num),
                            1)  #(75,52,52)
        self.ep6 = EP(98, 183, stride=1)  # (183,26,26)
        self.c10 = nn.Conv2d(183, self.anchor_num * (5 + self.class_num),
                             1)  #(75,26,26)
        self.ep7 = EP(189, 462, stride=1)  # (462,13,13)
        self.c11 = nn.Conv2d(462, self.anchor_num * (5 + self.class_num),
                             1)  #(75,13,13)

        anchors52 = [[61, 9], [17, 22], [22, 50]]  # 52x52
        anchors26 = [[36, 30], [43, 65], [68, 41]]  # 26x26
        anchors13 = [[156, 134], [67, 107], [108, 63]]  # 13x13
        self.yolo52 = YOLOLayer(anchors52, class_num)
        self.yolo26 = YOLOLayer(anchors26, class_num)
        self.yolo13 = YOLOLayer(anchors13, class_num)
示例#19
0
    def forward(self, x):
        h, w = x.shape[2:]
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        C1 = x  # C1: [-1, 64, h, w]
        x = self.pool(x)
        if self.dropout:
            x = self.drop(x)
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        C2 = x  # C2: [-1, 128, h/2, w/2]
        x = self.pool(x)
        if self.dropout:
            x = self.drop(x)
        x = self.relu(self.conv5(x))
        x = self.relu(self.conv6(x))
        x = self.relu(self.conv7(x))
        C3 = x  # C3: [-1, 256, h/4, w/4]
        x = self.pool(x)
        if self.dropout:
            x = self.drop(x)
        x = self.relu(self.conv8(x))
        x = self.relu(self.conv9(x))
        x = self.relu(self.conv10(x))
        C4 = x  # C4: [-1, 512, h/8, w/8]
        x = self.pool(x)
        if self.dropout:
            x = self.drop(x)
        x = self.relu(self.conv11(x))
        x = self.relu(self.conv12(x))
        x = self.relu(self.conv13(x))
        if self.dropout:
            x = self.drop(x)
        C5 = x  # C5: [-1, 512, h/16, w/16]

        # C1 C2
        C1 = self.conv14(C1)
        C1 = self.relu(self.bn1(C1))
        C2 = self.conv15(C2)
        C2 = self.relu(self.bn2(C2))
        C12 = concatenate([C1, C2], 1)  # C12: [-1, 64+128, h, w]
        C12 = self.conv16(C12)
        C12 = self.relu(self.bn3(C12))  # C12: [-1, 64, h, w]

        C3_cfe = self.relu(
            self.bn4(
                concatenate([
                    self.conv17(C3),
                    self.conv18(C3),
                    self.conv19(C3),
                    self.conv20(C3)
                ], 1)))
        C4_cfe = self.relu(
            self.bn5(
                concatenate([
                    self.conv21(C4),
                    self.conv22(C4),
                    self.conv23(C4),
                    self.conv24(C4)
                ], 1)))
        C5_cfe = self.relu(
            self.bn6(
                concatenate([
                    self.conv25(C5),
                    self.conv26(C5),
                    self.conv27(C5),
                    self.conv28(C5)
                ], 1)))
        C345 = concatenate([C3_cfe, C4_cfe, C5_cfe],
                           1)  # C345: [-1, 32*4*3, h/4, w/4]

        # CA
        if self.with_CA:
            h2, w2 = C345.shape[2:]
            CA = nn.AvgPool2d((h2, w2))(C345).view(-1, 384)
            CA = self.linear1(CA)
            CA = self.relu(CA)
            CA = self.linear2(CA)
            CA = self.sigmoid(CA).view(-1, 384, 1, 1).repeat(1, 1, h2, w2)
            C345 = CA * C345
            C345 = self.conv29(C345)
            C345 = self.relu(self.bn7(C345))
            C345 = nn.UpsamplingBilinear2d(size=(h, w))(
                C345)  # C345: [-1, 64, h, w]

        if self.with_SA:
            attention1 = self.relu(self.bn8(
                self.conv30(C345)))  # [-1, 32, h, w]
            attention1 = self.relu(self.bn9(
                self.conv31(attention1)))  # [-1, 1, h, w]
            attention2 = self.relu(self.bn10(
                self.conv32(C345)))  # [-1, 32, h, w]
            attention2 = self.relu(self.bn11(
                self.conv33(attention2)))  # [-1, 1, h, w]
            SA = attention1 + attention2
            SA = self.sigmoid(SA)  # [-1, 1, h, w]
            SA = SA.repeat([1, 64, 1, 1])
            C12 = SA * C12  # [-1, 64, h, w]
        fea = torch.cat([C12, C345], 1)  # [-1, 128, h, w]
        x = self.conv34(fea)
        return x
示例#20
0
 def __init__(self, in_ch, out_ch):
     super(upbn, self).__init__()
     self.up = nn.UpsamplingBilinear2d(scale_factor=2)
     self.conv = double_convbn(in_ch, out_ch)
示例#21
0
    def __init__(self, num_classes=23, hparams=None):
        super().__init__()
        self.hparams = hparams
        #######################################################################
        #                             YOUR CODE                               #
        #######################################################################
        # filters = [64, 128, 256, 512, 1024]
        filters = [32, 64, 128, 256, 512]
        self.model = nn.Sequential(
            # 64*236*236 -> 64*118*118
            nn.Conv2d(3, filters[0], 3, padding=1),
            nn.BatchNorm2d(filters[0]),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            # nn.Dropout(p=0.25),

            # 128*116*116 -> 128*58*58
            nn.Conv2d(filters[0], filters[1], 3, padding=1),
            nn.BatchNorm2d(filters[1]),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            # 256*56*56 -> 256*28*28
            nn.Conv2d(filters[1], filters[2], 3, padding=1),
            nn.BatchNorm2d(filters[2]),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            # 512*26*26 -> 512*13*13
            nn.Conv2d(filters[2], filters[3], 3, padding=1),
            nn.BatchNorm2d(filters[3]),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            # 1024*12*12 -> 1024*6*6
            nn.Conv2d(filters[3], filters[4], 3, padding=1),
            nn.BatchNorm2d(filters[4]),
            nn.ReLU(),

            # ------------ start up-sampling ------------#

            # 1024*12*12 -> 512*11*11
            nn.UpsamplingBilinear2d(scale_factor=2),
            nn.Conv2d(filters[4], filters[3], 1),
            nn.ReLU(),

            # 512*22*22 -> 256*20*20
            nn.UpsamplingBilinear2d(scale_factor=2),
            nn.Conv2d(filters[3], filters[2], 1),
            nn.ReLU(),

            # 256*40*40 -> 128*38*38
            nn.UpsamplingBilinear2d(scale_factor=2),
            nn.Conv2d(filters[2], filters[1], 1),
            nn.ReLU(),

            # 128*76*76 -> 64*74*74
            nn.UpsamplingBilinear2d(scale_factor=2),
            nn.Conv2d(filters[1], filters[0], 1),
            nn.ReLU(),
            nn.Conv2d(filters[0], num_classes, 1))
示例#22
0
 def __init__(self, in_ch, out_ch):
     super(uphalf, self).__init__()
     self.up = nn.UpsamplingBilinear2d(scale_factor=2)
     self.conv = half(in_ch, out_ch)
示例#23
0
 def __init__(self, block=Bottleneck):
     super(ResNetUpSample, self).__init__()
     self.inplanes = 64
     self.layer2 = self._make_layer(block, planes=128, blocks=3)
     self.upsample = nn.UpsamplingBilinear2d(scale_factor=2)
示例#24
0
def runIter(args,
            encoder,
            decoder,
            x,
            y_mask,
            sw_mask,
            crits,
            optims,
            mode='train',
            loss=None,
            prev_hidden_temporal_list=None,
            prev_mask=None,
            last_frame=False):
    """
    Runs forward a batch
    """
    mask_siou = crits
    enc_opt, dec_opt = optims
    T = args.maxseqlen
    hidden_spatial = None
    out_masks = []
    if mode == 'train':
        encoder.train(True)
        decoder.train(True)
    else:
        encoder.train(False)
        decoder.train(False)
    feats = encoder(x, nlb=args.NLB)

    hidden_temporal_list = []

    # loop over sequence length and get predictions
    for t in range(0, T):
        #prev_hidden_temporal_list is a list with the hidden state for all instances from previous time instant
        #If this is the first frame of the sequence, hidden_temporal is initialized to None. Otherwise, it is set with the value from previous time instant.
        if prev_hidden_temporal_list is not None:
            hidden_temporal = prev_hidden_temporal_list[t]
            if args.only_temporal:
                hidden_spatial = None
        else:
            hidden_temporal = None

        mask_lstm = []
        maxpool = nn.MaxPool2d((2, 2), ceil_mode=True)
        prev_mask_instance = prev_mask[:, t, :]
        prev_mask_instance = prev_mask_instance.view(
            prev_mask_instance.size(0), 1, x.data.size(2), -1)
        prev_mask_instance = maxpool(prev_mask_instance)
        for ii in range(len(feats)):
            prev_mask_instance = maxpool(prev_mask_instance)
            mask_lstm.append(prev_mask_instance)

        mask_lstm = list(reversed(mask_lstm))

        #The decoder receives two hidden state variables: hidden_spatial (a tuple, with hidden_state and cell_state) which refers to the
        #hidden state from the previous object instance from the same time instant, and hidden_temporal which refers to the hidden state from the same
        #object instance from the previous time instant.
        out_mask, hidden = decoder(feats, mask_lstm, hidden_spatial,
                                   hidden_temporal)
        hidden_tmp = []
        for ss in range(len(hidden)):
            hidden_tmp.append(hidden[ss][0])
        hidden_spatial = hidden
        hidden_temporal_list.append(hidden_tmp)

        upsample_match = nn.UpsamplingBilinear2d(size=(x.size()[-2],
                                                       x.size()[-1]))
        out_mask = upsample_match(out_mask)
        out_mask = out_mask.view(out_mask.size(0), -1)

        # get predictions in list to concat later
        out_masks.append(out_mask)

    # concat all outputs into single tensor to compute the loss
    t = len(out_masks)
    out_masks = torch.cat(out_masks, 1).view(out_mask.size(0), len(out_masks),
                                             -1)

    sw_mask = Variable(torch.from_numpy(
        sw_mask.data.cpu().numpy()[:, 0:t])).contiguous().float()

    if args.use_gpu:
        sw_mask = sw_mask.cuda()
    else:
        out_masks = out_masks.contiguous()

    #loss is masked with sw_mask
    loss_mask_iou = mask_siou(y_mask.view(-1,
                                          y_mask.size()[-1]),
                              out_masks.view(-1,
                                             out_masks.size()[-1]),
                              sw_mask.view(-1, 1))
    loss_mask_iou = torch.mean(loss_mask_iou)

    # total loss is the weighted sum of all terms
    if loss is None:
        loss = args.iou_weight * loss_mask_iou
    else:
        loss += args.iou_weight * loss_mask_iou

    if last_frame:
        enc_opt.zero_grad()
        dec_opt.zero_grad()
        decoder.zero_grad()
        encoder.zero_grad()

        if mode == 'train':
            loss.backward()
            dec_opt.step()
            if args.update_encoder:
                enc_opt.step()

    #pytorch 0.4
    #losses = [loss.data[0], loss_mask_iou.data[0]]
    #pytorch 1.0
    losses = [loss.data.item(), loss_mask_iou.data.item()]

    out_masks = torch.sigmoid(out_masks)
    outs = out_masks.data

    del loss_mask_iou, feats, x, y_mask, sw_mask
    if last_frame:
        del loss
        loss = None

    return loss, losses, outs, hidden_temporal_list
示例#25
0
    def __init__(self,
                 input_nc,
                 output_nc,
                 ngf=64,
                 n_downsampling=3,
                 n_blocks=9,
                 norm_layer=nn.BatchNorm2d,
                 padding_type='reflect',
                 dilation=1,
                 interpolated_conv=False):
        assert (n_blocks >= 0)
        super(GlobalGenerator, self).__init__()
        activation = nn.ReLU(True)

        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0),
            norm_layer(ngf), activation
        ]
        ### downsample
        for i in range(n_downsampling):
            mult = 2**i
            if interpolated_conv is True:
                model += [
                    nn.Conv2d(ngf * mult,
                              ngf * mult * 2,
                              kernel_size=3,
                              stride=1,
                              padding=1),
                    nn.MaxPool2d(2),
                    norm_layer(ngf * mult * 2),
                    nn.ReLU(True),
                ]
            else:
                model += [
                    nn.Conv2d(ngf * mult,
                              ngf * mult * 2,
                              kernel_size=3,
                              stride=2,
                              padding=1),
                    norm_layer(ngf * mult * 2), activation
                ]

        ### resnet blocks
        mult = 2**n_downsampling
        for i in range(n_blocks):
            model += [
                ResnetBlock(ngf * mult,
                            padding_type=padding_type,
                            activation=activation,
                            norm_layer=norm_layer,
                            dilation=dilation)
            ]

        ### upsample
        # Harry: use interpolated convolution instead of transposed convolution
        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
            if interpolated_conv is True:
                model += [
                    nn.UpsamplingBilinear2d(scale_factor=2),
                    nn.Conv2d(ngf * mult, int(ngf * mult / 2), 3, padding=1),
                    norm_layer(int(ngf * mult / 2)),
                    nn.ReLU(True)
                ]
            else:
                model += [
                    nn.ConvTranspose2d(ngf * mult,
                                       int(ngf * mult / 2),
                                       kernel_size=3,
                                       stride=2,
                                       padding=1,
                                       output_padding=1),
                    norm_layer(int(ngf * mult / 2)), activation
                ]

        model += [
            nn.ReflectionPad2d(3),
            nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),
            nn.Tanh()
        ]
        self.model = nn.Sequential(*model)
示例#26
0
    def __init__(self,
                 kernel_cores=[32],
                 encoding=16,
                 input_size=[64, 64],
                 num_channels=3):
        super(DimoCAE, self).__init__()

        self.kernel_cores = kernel_cores
        self._initialize_weights()
        self.encoding = encoding

        in_channels = num_channels
        layers = []
        scaling_size = input_size[0]
        num_layers = len(kernel_cores)
        for i in range(num_layers):
            conve2d = nn.Conv2d(in_channels,
                                kernel_cores[i],
                                kernel_size=(3, 3),
                                padding=(1, 1))
            #drope2d = nn.Dropout(0.3)
            #relue2d = nn.ReLU(True)

            maxpoole2d = nn.MaxPool2d(2)  #,padding=(0,1))
            layers += [conve2d, maxpoole2d]  #, maxpoole2d]
            in_channels = kernel_cores[i]

        self.encoder = nn.Sequential(*layers)

        layers_d = []
        scaling_size = scaling_size / 2**num_layers
        scaling_size *= 2

        layers_fc, layers_dc = [], []
        dim = input_size[0] / (2**num_layers)
        self.dim = dim
        layers_fc += [
            nn.Linear(kernel_cores[-1] * dim * dim, encoding),
            nn.ReLU()
        ]
        self.fc = nn.Sequential(*layers_fc)
        layers_dc += [
            nn.Linear(encoding, kernel_cores[-1] * dim * dim, nn.ReLU())
        ]
        self.dc = nn.Sequential(*layers_dc)

        # possible bug, last layer does not return to Feature size, but first kernel_cores size
        for j in range(num_layers):
            upsample = nn.UpsamplingBilinear2d(scaling_size)

            if j != num_layers - 1:
                conv2d = nn.Conv2d(in_channels,
                                   kernel_cores[-j - 2],
                                   kernel_size=(3, 3),
                                   padding=(1, 1))
            else:
                conv2d = nn.Conv2d(in_channels,
                                   num_channels,
                                   kernel_size=(3, 3),
                                   padding=(1, 1))
            #drop2d = nn.Dropout(0.3)
            if j != num_layers - 1:
                layers_d += [upsample, conv2d]
            else:
                layers_d += [upsample, conv2d]

            if j != num_layers - 1:
                in_channels = kernel_cores[-j - 2]
            scaling_size *= 2

        self.decoder = nn.Sequential(*layers_d)
    train_loss_all = []
    valid_loss_all = []

    for epoch in range(1000):  # loop over the dataset multiple times

        train_loss_epoch = []
        for i, data in enumerate(train_dataloader):
            # training
            images, poses = data['image'], data['pose']
            guass_heatmap = data['guass_heatmap']

            images, poses = Variable(images.cuda()), Variable(poses.cuda())
            optimizer.zero_grad()

            outputs = net(images)
            output_heatmap = nn.UpsamplingBilinear2d(
                (inputsize, inputsize))(outputs)
            guass_heatmap = Variable(guass_heatmap.cuda())

            loss = criterion(output_heatmap, guass_heatmap)
            loss.backward()
            optimizer.step()

            train_loss_epoch.append(loss.data[0])

        if epoch % 2 == 0:
            valid_loss_epoch = []
            for i_batch, sample_batched in enumerate(test_dataloader):
                # calculate the valid loss
                net_forward = net
                images = sample_batched['image'].cuda()
                poses = sample_batched['pose'].cuda()
示例#28
0
    def __init__(self, phase, base, extras, head, num_classes, batch_norm):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.batch_norm = batch_norm
        # TODO: implement __call__ in PriorBox
        self.priorbox = PriorBox(v2)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = 300

        # SSD network
        self.vgg = nn.ModuleList(base)

        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        # FSSD extra layers before fusion
        self.conv81 = nn.Conv2d(1024, 256, kernel_size=1)
        self.conv81.apply(weights_init)
        self.bn_conv81 = nn.BatchNorm2d(256)
        self.conv82 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
        self.conv82.apply(weights_init)
        self.bn_conv82 = nn.BatchNorm2d(512)
        self.bn_fused = nn.BatchNorm2d(768)
        # FSSD extra layers after fusion
        self.conv91 = nn.Conv2d(768, 512, kernel_size=3, stride=1, padding=1)
        self.conv91.apply(weights_init)
        self.bn_conv91 = nn.BatchNorm2d(512)
        self.conv101 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1)
        self.conv101.apply(weights_init)
        self.bn_conv101 = nn.BatchNorm2d(512)
        self.conv111 = nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1)
        self.conv111.apply(weights_init)
        self.bn_conv111 = nn.BatchNorm2d(256)
        self.conv121 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)
        self.conv121.apply(weights_init)
        self.bn_conv121 = nn.BatchNorm2d(256)
        self.conv131 = nn.Conv2d(256, 256, kernel_size=3, stride=1)
        self.conv131.apply(weights_init)
        self.bn_conv131 = nn.BatchNorm2d(256)
        self.conv141 = nn.Conv2d(256, 256, kernel_size=3, stride=1)
        self.conv141.apply(weights_init)
        self.bn_conv141 = nn.BatchNorm2d(256)

        # FSSD fuse layers
        self.fuse_conv43 = nn.Conv2d(512, 256, kernel_size=1)
        self.fuse_conv43.apply(weights_init)
        self.fuse_fc7 = nn.Conv2d(1024, 256, kernel_size=1)
        self.fuse_fc7.apply(weights_init)
        self.fuse_fc7_bilinear = nn.UpsamplingBilinear2d(size=(38, 38))
        self.fuse_conv82 = nn.Conv2d(512, 256, kernel_size=1)
        self.fuse_conv82.apply(weights_init)
        self.fuse_conv82_bilienar = nn.UpsamplingBilinear2d(size=(38, 38))
示例#29
0
from torch.autograd import Variable
from PIL import Image
import matplotlib.pyplot as plt
import scipy.misc

database = Database("data/DAVIS/", "data/DAVIS/ImageSets/480p/val.txt")

deep_lab = Res_Deeplab()
deep_lab.load_state_dict(
    torch.load("data/models/MS_DeepLab_resnet_pretrained_COCO_init.pth"))
logsoftmax = nn.LogSoftmax()

while database.has_next():
    images, targets, name = database.get_test()
    image = Variable(torch.from_numpy(images[1]).float(), volatile=True)
    rescale = nn.UpsamplingBilinear2d(size=(images[0].shape[2],
                                            images[0].shape[3]))

    appearance = deep_lab(image)[3]
    appearance = rescale(appearance)
    appearance = appearance.data.numpy()[0]
    print(np.amin(appearance))
    print(np.amax(appearance))
    print(appearance.shape)
    slices = np.split(appearance, appearance.shape[0], axis=0)
    print(slices[0].shape)
    overlay_color = [255, 0, 0]
    transparency = 0.999
    maxval = np.amax(appearance)
    slices = sorted(slices, key=lambda x: np.amax(x))
    os.makedirs(os.path.join("deeplabvis", name))
    for i in range(len(slices)):
示例#30
0
    def __init__(self, load_weights=True):
        super(net10_local, self).__init__()
        self.frontend_feat = [
            64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512
        ]
        self.frontend = make_layers(self.frontend_feat, dilation=False)
        # self.build_feature_layer = nn.Sequential(nn.Conv2d(512, 512, kernel_size=1, padding=0))   # 加上这一层是对固定vgg16提取的特征进行一个再加工,方便与auxiliary mask的结合

        self.auxiliary_backend_feat = ['M', 512, 512, 512, 'M', 512, 256]
        self.auxiliary_backend = make_layers(self.auxiliary_backend_feat,
                                             in_channels=512,
                                             dilation=False)

        self.auxiliary_upsample1 = nn.Sequential(
            nn.UpsamplingBilinear2d(size=(64, 64)),
            nn.Conv2d(256, 256, kernel_size=1, padding=0),
            nn.ReLU(inplace=True))
        self.auxiliary_upsample2 = nn.Sequential(
            nn.UpsamplingBilinear2d(size=(128, 128)),
            nn.Conv2d(256, 256, kernel_size=1, padding=0),
            nn.ReLU(inplace=True))

        self.auxiliary_backend_output_layer = nn.Conv2d(256, 1, kernel_size=1)

        self.trunk_backend1 = nn.Sequential(
            nn.Conv2d(768, 512, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True))
        self.trunk_backend2_1 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True))
        self.trunk_backend2_2 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=2, stride=1,
                      dilation=2), nn.ReLU(inplace=True))
        self.trunk_backend3_1 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True))
        self.trunk_backend3_2 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=2, stride=1,
                      dilation=2), nn.ReLU(inplace=True))
        self.trunk_backend3_3 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=4, stride=1,
                      dilation=4), nn.ReLU(inplace=True))
        self.trunk_backend4_1 = nn.Sequential(
            nn.Conv2d(768, 256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True))
        self.trunk_backend4_2 = nn.Sequential(
            nn.Conv2d(768, 256, kernel_size=3, padding=2, stride=1,
                      dilation=2), nn.ReLU(inplace=True))
        self.trunk_backend5 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True))

        self.density_map_layer = nn.Conv2d(256, 1, kernel_size=1)

        if load_weights:
            mod = models.vgg16(pretrained=True)
            self._initialize_weights()
            for i in range(len(self.frontend.state_dict().items())):
                list(self.frontend.state_dict().items())[i][1].data[:] = list(
                    mod.state_dict().items())[i][1].data[:]
            for k in self.frontend.children():
                for param in k.parameters():
                    param.requires_grad = False
        else:
            self._initialize_weights()