Пример #1
0
    def __init__(self, net_enc, net_dec, crit, args, deep_sup_scale=None):
        super(ETC, self).__init__()

        self.raft = RAFT()
        to_load = torch.load('./RAFT/models/raft-things.pth-no-zip')
        new_state_dict = OrderedDict()
        for k, v in to_load.items():
            name = k[7:]  # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
            new_state_dict[name] = v  #新字典的key值对应的value为一一对应的值。
        self.raft.load_state_dict(new_state_dict)
        ####
        self.mean = torch.FloatTensor([0.485, 0.456, 0.406])
        self.std = torch.FloatTensor([0.229, 0.224, 0.225])
        ####
        self.encoder = net_enc
        self.decoder = net_dec
        self.crit = crit
        self.deep_sup_scale = deep_sup_scale
        self.args = args
        assert (self.args.clip_num == 2 and self.args.dilation_num == 0)
        self.conv_last_ = nn.Sequential(
            nn.Conv2d(2048 + 4 * 512,
                      512,
                      kernel_size=3,
                      padding=1,
                      bias=False), BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Dropout2d(0.1), nn.Conv2d(512, args.num_class, kernel_size=1))
        self.criterion_flow = nn.MSELoss()
Пример #2
0
    def __init__(self, net_enc, crit, args,deep_sup_scale=None):
        super(NetWarp_ocr, self).__init__()

        self.raft = RAFT()
        to_load = torch.load('./RAFT/models/raft-things.pth-no-zip')
        new_state_dict = OrderedDict()
        for k, v in to_load.items():
            name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
            new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。
        self.raft.load_state_dict(new_state_dict)
        ####
        self.mean=torch.FloatTensor([0.485, 0.456, 0.406])
        self.std=torch.FloatTensor([0.229, 0.224, 0.225])
        ####
        self.encoder = net_enc
        self.decoder = SpatialOCRNetasDec(args.num_class)
        self.head = nn.Conv2d(512, args.num_class, kernel_size=1, stride=1, padding=0, bias=True)
        self.crit = crit
        self.deep_sup_scale = deep_sup_scale
        self.args= args
        #assert (self.args.clip_num==2 and self.args.dilation_num==0)
        assert self.args.clip_num==2


        self.flowcnn=FlowCNN()
        #self.conv_last_ = nn.Sequential(
        #    nn.Conv2d(2048+4*512, 512,
        #              kernel_size=3, padding=1, bias=False),
        #    BatchNorm2d(512),
        #    nn.ReLU(inplace=True),
        #    nn.Dropout2d(0.1),
        #    nn.Conv2d(512, args.num_class, kernel_size=1)
        #     )
        self.w0_0 = nn.Parameter(torch.FloatTensor(2048), requires_grad=True) 
        self.w0_0.data.fill_(1.0)
        self.w0_1 = nn.Parameter(torch.FloatTensor(2048), requires_grad=True) 
        self.w0_1.data.fill_(0.0)
        self.w1_0 = nn.Parameter(torch.FloatTensor(512), requires_grad=True) 
        self.w1_0.data.fill_(1.0)
        self.w1_1 = nn.Parameter(torch.FloatTensor(512), requires_grad=True) 
        self.w1_1.data.fill_(0.0)
Пример #3
0
    def __init__(self, net_enc, crit, args, deep_sup_scale=None):
        super(ETC_ocr, self).__init__()

        self.raft = RAFT()
        to_load = torch.load('./RAFT/models/raft-things.pth-no-zip')
        new_state_dict = OrderedDict()
        for k, v in to_load.items():
            name = k[7:]  # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
            new_state_dict[name] = v  #新字典的key值对应的value为一一对应的值。
        self.raft.load_state_dict(new_state_dict)
        ####
        self.mean = torch.FloatTensor([0.485, 0.456, 0.406])
        self.std = torch.FloatTensor([0.229, 0.224, 0.225])
        ####
        self.encoder = net_enc
        self.decoder = SpatialOCRNetasDec(args.num_class)
        self.crit = crit
        self.deep_sup_scale = deep_sup_scale
        self.args = args
        assert (self.args.clip_num == 2 and self.args.dilation_num == 0)
        self.conv_last_ = nn.Conv2d(512, args.num_class, kernel_size=1)
        self.criterion_flow = nn.MSELoss()
Пример #4
0
class NetWarp(nn.Module):
    def __init__(self, net_enc, net_dec, crit, args, deep_sup_scale=None):
        super(NetWarp, self).__init__()

        self.raft = RAFT()
        to_load = torch.load('./RAFT/models/raft-things.pth-no-zip')
        new_state_dict = OrderedDict()
        for k, v in to_load.items():
            name = k[7:]  # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
            new_state_dict[name] = v  #新字典的key值对应的value为一一对应的值。
        self.raft.load_state_dict(new_state_dict)
        ####
        self.mean = torch.FloatTensor([0.485, 0.456, 0.406])
        self.std = torch.FloatTensor([0.229, 0.224, 0.225])
        ####
        self.encoder = net_enc
        self.decoder = net_dec
        self.crit = crit
        self.deep_sup_scale = deep_sup_scale
        self.args = args
        #assert (self.args.clip_num==2 and self.args.dilation_num==0)
        assert self.args.clip_num == 2

        self.flowcnn = FlowCNN()
        self.conv_last_ = nn.Sequential(
            nn.Conv2d(2048 + 4 * 512,
                      512,
                      kernel_size=3,
                      padding=1,
                      bias=False), BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Dropout2d(0.1), nn.Conv2d(512, args.num_class, kernel_size=1))
        self.w0_0 = nn.Parameter(torch.FloatTensor(2048), requires_grad=True)
        self.w0_0.data.fill_(1.0)
        self.w0_1 = nn.Parameter(torch.FloatTensor(2048), requires_grad=True)
        self.w0_1.data.fill_(0.0)
        self.w1_0 = nn.Parameter(torch.FloatTensor(4096), requires_grad=True)
        self.w1_0.data.fill_(1.0)
        self.w1_1 = nn.Parameter(torch.FloatTensor(4096), requires_grad=True)
        self.w1_1.data.fill_(0.0)

    def pixel_acc(self, pred, label):
        _, preds = torch.max(pred, dim=1)
        valid = (label >= 0).long()
        acc_sum = torch.sum(valid * (preds == label).long())
        pixel_sum = torch.sum(valid)
        acc = acc_sum.float() / (pixel_sum.float() + 1e-10)
        return acc

    def get_1x_lr_params(self):
        modules = [self.encoder]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and (not ('bias' in key)):

                        yield p

    def get_10x_lr_params(self):
        modules = [self.decoder, self.flowcnn, self.conv_last_]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and (not ('bias' in key)):
                        yield p
        for w in [self.w0_0, self.w1_0, self.w1_1, self.w0_1]:
            yield w

    def get_1x_lr_params_bias(self):
        modules = [self.encoder]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and 'bias' in key:
                        yield p

    def get_10x_lr_params_bias(self):
        modules = [self.decoder, self.flowcnn, self.conv_last_]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and 'bias' in key:
                        yield p

    def forward(self, feed_dict, *, segSize=None):
        if feed_dict is None:
            return torch.zeros((0, self.args.num_class, 480, 720)).cuda()
        # training
        c_img = feed_dict['img_data']
        clip_imgs = feed_dict['clipimgs_data']
        label = feed_dict['seg_label']
        clip_num = len(clip_imgs)
        assert (clip_num == 1)
        n, _, h, w = label.size()
        c_pre_img = clip_imgs[0]
        mean = self.mean.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
        mean = mean.to(c_img.device)
        mean = mean.expand_as(c_img)
        std = self.std.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
        std = std.to(c_img.device)
        std = std.expand_as(c_img)
        c_img_f = ((c_img * std) + mean) * 255.
        c_pre_img_f = (c_pre_img * std + mean) * 255.
        with torch.no_grad():
            self.raft.eval()
            #            if segSize is None:
            padder = InputPadder((h, w))
            c_img_f_ = padder.pad(c_img_f)
            c_pre_img_f_ = padder.pad(c_pre_img_f)
            _, flow = self.raft(c_img_f_,
                                c_pre_img_f_,
                                iters=20,
                                test_mode=True)
            flow = padder.unpad(flow)

        #########
        #print(c_img_f.size())
        #c_img_show = c_img_f.squeeze(0).permute(1,2,0).cpu().numpy()
        #c_img_show = Image.fromarray(c_img_show.astype('uint8'))
        #c_img_show.save('111.png')
        #c_pre_img_show = c_pre_img_f.squeeze(0).permute(1,2,0).cpu().numpy()
        #c_pre_img_show = Image.fromarray(c_pre_img_show.astype('uint8'))
        #c_pre_img_show.save('222.png')
        #c_img_warp =flowwarp(c_pre_img_f,flow)
        #c_img_warp = c_img_warp.squeeze(0).permute(1,2,0).cpu().numpy()
        #c_img_warp  = Image.fromarray(c_img_warp.astype('uint8'))
        #c_img_warp.save('warp_111.png')
        #exit()

        #########
        flow = self.flowcnn(c_img_f, c_pre_img_f, flow)
        input = torch.cat([c_img, c_pre_img], 0)
        clip_tmp = self.encoder(input, return_feature_maps=True)
        c_img_f1, c_pre_img_f1 = torch.split(clip_tmp[-1],
                                             split_size_or_sections=int(
                                                 clip_tmp[-1].size(0) / 2),
                                             dim=0)
        flow_1 = F.interpolate(flow, c_img_f1.size()[-2:], mode='nearest')
        c_img_f1_warp = flowwarp(c_pre_img_f1, flow_1)
        new_c_img_f1 = self.w0_0.unsqueeze(0).unsqueeze(-1).unsqueeze(
            -1).expand_as(c_img_f1).to(
                c_img_f1.device) * c_img_f1 + self.w0_1.unsqueeze(0).unsqueeze(
                    -1).unsqueeze(-1).expand_as(c_img_f1_warp).to(
                        c_img_f1_warp.device) * c_img_f1_warp
        feat = torch.cat([new_c_img_f1, c_pre_img_f1], 0)
        clip_tmp[-1] = feat
        pred_deepsup_s, _, clip_tmp2 = self.decoder(clip_tmp)
        c_img_f2, c_pre_img_f2 = torch.split(clip_tmp2,
                                             split_size_or_sections=int(
                                                 clip_tmp2.size(0) / 2),
                                             dim=0)
        ####
        #ccc1,ccc2  = torch.split(_,split_size_or_sections=int(clip_tmp2.size(0)/2),dim=0)
        #save1 = ccc1.cpu().numpy()
        #save1 = np.save('1.npy',save1)
        #save2 = ccc2.cpu().numpy()
        #save2 = np.save('2.npy',save2)
        #exit()
        ###
        flow_2 = F.interpolate(flow, c_img_f2.size()[-2:], mode='nearest')
        c_img_f2_warp = flowwarp(c_pre_img_f2, flow_2)
        new_feat = self.w1_0.unsqueeze(0).unsqueeze(-1).unsqueeze(-1).expand_as(c_img_f2).to(c_img_f2)*c_img_f2+ \
                   self.w1_1.unsqueeze(0).unsqueeze(-1).unsqueeze(-1).expand_as(c_img_f2_warp).to(c_img_f2_warp)*c_img_f2_warp
        pred_ = self.conv_last_(new_feat)

        if segSize is not None:
            pred_ = nn.functional.interpolate(pred_,
                                              size=segSize,
                                              mode='bilinear',
                                              align_corners=False)
            pred_ = nn.functional.softmax(pred_, dim=1)
            return pred_
        else:
            pred_ = nn.functional.log_softmax(pred_, dim=1)
            _, _, h, w = label.size()
            label = label.squeeze(1)
            label = label.long()
            pred_ = F.interpolate(pred_, (h, w),
                                  mode='bilinear',
                                  align_corners=False)
            loss = self.crit(pred_, label)
            if self.deep_sup_scale is not None:
                pred_deepsup_s = torch.split(pred_deepsup_s,
                                             split_size_or_sections=int(
                                                 pred_deepsup_s.size(0) / 2),
                                             dim=0)
                pred_deepsup = F.interpolate(pred_deepsup_s[0], (h, w),
                                             mode='bilinear',
                                             align_corners=False)
                #pred_deepsup= nn.functional.log_softmax(pred_deepsup, dim=1)
                loss_deepsup = self.crit(pred_deepsup, label)
                loss = loss + loss_deepsup * self.deep_sup_scale
            acc = self.pixel_acc(pred_, label)
            return loss, acc
Пример #5
0
DIR_ = '/your/path/to/VSPW_480p'

data_dir = DIR_ + '/data'
result_dir = './prediction'
#list_=['1001_5z_ijQjUf_0','1002_QXQ_QoswLOs']

split = 'val.txt'
with open(os.path.join(DIR_, split), 'r') as f:

    list_ = f.readlines()
    list_ = [v[:-1] for v in list_]

###
gpu = 0
model_raft = RAFT()
to_load = torch.load('./RAFT_core/raft-things.pth-no-zip')
new_state_dict = OrderedDict()
for k, v in to_load.items():
    name = k[7:]  # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
    new_state_dict[name] = v  #新字典的key值对应的value为一一对应的值。
model_raft.load_state_dict(new_state_dict)
model_raft = model_raft.cuda(gpu)
###
total_TC = 0.
evaluator = Evaluator(num_class)
for video in list_[:100]:
    if video[0] == '.':
        continue
    imglist_ = sorted(os.listdir(os.path.join(data_dir, video, 'origin')))
    for i, img in enumerate(imglist_[:-1]):
Пример #6
0
class ETC_ocr(nn.Module):
    def __init__(self, net_enc, crit, args, deep_sup_scale=None):
        super(ETC_ocr, self).__init__()

        self.raft = RAFT()
        to_load = torch.load('./RAFT/models/raft-things.pth-no-zip')
        new_state_dict = OrderedDict()
        for k, v in to_load.items():
            name = k[7:]  # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
            new_state_dict[name] = v  #新字典的key值对应的value为一一对应的值。
        self.raft.load_state_dict(new_state_dict)
        ####
        self.mean = torch.FloatTensor([0.485, 0.456, 0.406])
        self.std = torch.FloatTensor([0.229, 0.224, 0.225])
        ####
        self.encoder = net_enc
        self.decoder = SpatialOCRNetasDec(args.num_class)
        self.crit = crit
        self.deep_sup_scale = deep_sup_scale
        self.args = args
        assert (self.args.clip_num == 2 and self.args.dilation_num == 0)
        self.conv_last_ = nn.Conv2d(512, args.num_class, kernel_size=1)
        self.criterion_flow = nn.MSELoss()

    def pixel_acc(self, pred, label):
        _, preds = torch.max(pred, dim=1)
        valid = (label >= 0).long()
        acc_sum = torch.sum(valid * (preds == label).long())
        pixel_sum = torch.sum(valid)
        acc = acc_sum.float() / (pixel_sum.float() + 1e-10)
        return acc

    def get_1x_lr_params(self):
        modules = [self.encoder]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and (not ('bias' in key)):

                        yield p

    def get_10x_lr_params(self):
        modules = [self.decoder, self.conv_last_]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and (not ('bias' in key)):
                        yield p

    def get_1x_lr_params_bias(self):
        modules = [self.encoder]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and 'bias' in key:
                        yield p

    def get_10x_lr_params_bias(self):
        modules = [self.decoder, self.conv_last_]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                for key, p in m[1].named_parameters():
                    if p.requires_grad and 'bias' in key:
                        yield p

    def forward(self, feed_dict, *, segSize=None):
        if feed_dict is None:
            return torch.zeros((0, self.args.num_class, 480, 720)).cuda()
        # training
        if segSize is None:
            c_img = feed_dict['img_data']
            clip_imgs = feed_dict['clipimgs_data']
            label = feed_dict['seg_label']
            clip_num = len(clip_imgs)
            assert (clip_num == 1)
            n, _, h, w = label.size()
            c_pre_img = clip_imgs[0]
            mean = self.mean.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
            mean = mean.to(c_img.device)
            mean = mean.expand_as(c_img)
            std = self.std.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
            std = std.to(c_img.device)
            std = std.expand_as(c_img)
            c_img_f = ((c_img * std) + mean) * 255.
            c_pre_img_f = (c_pre_img * std + mean) * 255.
            with torch.no_grad():
                self.raft.eval()
                padder = InputPadder((h, w))
                c_img_f_ = padder.pad(c_img_f)
                c_pre_img_f_ = padder.pad(c_pre_img_f)
                # c_img_f = F.interpolate(c_img_f,(480,480),mode='bilinear',align_corners=False)
                # c_pre_img_f = F.interpolate(c_pre_img_f,(480,480),mode='bilinear',align_corners=False)
                _, flow = self.raft(c_img_f_,
                                    c_pre_img_f_,
                                    iters=20,
                                    test_mode=True)
                flow = padder.unpad(flow)

            #########
            input = torch.cat([c_img, c_pre_img], 0)
            clip_tmp = self.encoder(input, return_feature_maps=True)
            clip_tmp2, pred_deepsup_s = self.decoder(clip_tmp)
            c_img_f2, c_pre_img_f2 = torch.split(clip_tmp2,
                                                 split_size_or_sections=int(
                                                     clip_tmp2.size(0) / 2),
                                                 dim=0)
            pred_ = self.conv_last_(clip_tmp2)
            c_pred_, c_pre_pred_ = torch.split(pred_,
                                               split_size_or_sections=int(
                                                   pred_.size(0) / 2),
                                               dim=0)

            c_pred_1 = nn.functional.log_softmax(c_pred_, dim=1)
            _, _, h, w = label.size()
            label = label.squeeze(1)
            label = label.long()
            c_pred_1 = F.interpolate(c_pred_1, (h, w),
                                     mode='bilinear',
                                     align_corners=False)
            loss = self.crit(c_pred_1, label)
            #            if self.deep_sup_scale is not None:

            clip_label = feed_dict['cliplabels_data']
            clip_label.append(feed_dict['seg_label'])
            clip_label = torch.cat(clip_label, dim=0)
            clip_label = clip_label.squeeze(1).long()
            pred_deepsup_s = nn.functional.log_softmax(pred_deepsup_s, dim=1)
            pred_deepsup = F.interpolate(pred_deepsup_s, (h, w),
                                         mode='bilinear',
                                         align_corners=False)
            loss_deepsup = self.crit(pred_deepsup, clip_label)
            loss = loss + loss_deepsup * self.deep_sup_scale
            flow = F.interpolate(flow, (h, w), mode='nearest')
            c_pre_pred_ = F.interpolate(c_pre_pred_, (h, w),
                                        mode='bilinear',
                                        align_corners=False)
            c_pred_ = F.interpolate(c_pred_, (h, w),
                                    mode='bilinear',
                                    align_corners=False)
            warp_i1 = flowwarp(c_pre_img, flow)
            warp_o1 = flowwarp(c_pre_pred_, flow)

            noc_mask2 = torch.exp(
                -1 * torch.abs(torch.sum(c_img - warp_i1, dim=1))).unsqueeze(1)
            ST_loss = self.args.st_weight * self.criterion_flow(
                c_pred_ * noc_mask2, warp_o1 * noc_mask2)
            loss = loss + ST_loss
            acc = self.pixel_acc(c_pred_1, label)
            return loss, acc
        else:
            c_img = feed_dict['img_data']
            c_tmp = self.encoder(c_img, return_feature_maps=True)
            c_tmp2, pred_deepsup_s = self.decoder(c_tmp)
            c_pred_ = self.conv_last_(c_tmp2)
            c_pred_ = nn.functional.interpolate(c_pred_,
                                                size=segSize,
                                                mode='bilinear',
                                                align_corners=False)
            c_pred_ = nn.functional.softmax(c_pred_, dim=1)
            return c_pred_