def predict(self, img, scale, filp=[False, False]): self.extractor.eval() self.rpn.eval() self.roi_head.eval() n = img.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = img.shape img_size = (H, W) # ------------------ 预测 -------------------# with torch.no_grad(): scale = utils.totensor(scale) img = utils.totensor(img) features = self.extractor(img) rpn_loc, rpn_score, roi, _ = self.rpn(features, img_size, scale, training=False) roi_cls_loc, roi_cls_score = self.roi_head(features, roi) n_roi = roi.shape[0] roi_cls_loc = roi_cls_loc.view(n_roi, self.n_class, 4) roi = utils.totensor( dataset_utils.bbox_inverse(roi, (H, W), filp, scale)) # mean = utils.totensor(self.loc_normalize_mean) # std = utils.totensor(self.loc_normalize_std) # roi_cls_loc = (roi_cls_loc * std + mean) prob = F.softmax(roi_cls_score, dim=1) # shape:(n_roi,21) label = torch.max(prob, dim=1)[1] # shape:(n_roi,) index = utils.totensor(np.arange(0, n_roi)).long() roi_cls_loc = roi_cls_loc[index, label, :] label = utils.tonumpy(label).astype(int) cls_bbox = utils.loc2bbox(utils.tonumpy(roi), utils.tonumpy(roi_cls_loc)) # clip bounding box cls_bbox[:, 0::2] = np.clip(cls_bbox[:, 0::2], 0, H) cls_bbox[:, 1::2] = np.clip(cls_bbox[:, 1::2], 0, W) # ignore background background_mask = np.where(label != 0)[0] cls_bbox = cls_bbox[background_mask] label = label[background_mask] # prob = F.softmax(roi_cls_score,dim=1) # shape:(n_roi,21) # label = torch.max(prob,dim=1)[1] # shape:(n_roi,) # mask_label = np.where(label.cpu().numpy()!=0)[0] # # print(label.cpu().numpy()) # bbox = torch.gather(cls_bbox, 1, label.view(-1, 1).unsqueeze(2).repeat(1, 1, 4)).squeeze(1) # # delete background # label = label.cpu().numpy()[mask_label] # bbox = bbox.cpu().numpy()[mask_label] # print(cls_bbox.shape,label.shape) return cls_bbox, label
def forward(self, x, rois=None, im_indices=None, k=0, in_layer='conv1', out_layer='capsule',fea_view = False): if rois is not None: im_indices = totensor(im_indices).float() rois = totensor(rois).float() indices_and_rois = torch.cat([im_indices[:, None], rois], dim=1) conv3_fea = [] run = False for name, module in self.layers.named_children(): if name == in_layer: run = True if run: x = module(x) if name == 'conv3': conv3_fea = x x = self.roi(x, indices_and_rois) # [len(im_indices),512,7,7] if name == out_layer: if fea_view & (name == 'conv3'): x = x.view(len(im_indices),-1) return x if not self.all_fc: x = self.primary(x) cap_out = self.branches[k](x) else: #x = squash(x, dim=1) # [-,512,7,7] x = x.view(x.shape[0],-1) cap_out = self.cap_fc(x).unsqueeze(2).unsqueeze(3)# [batch_size, 2, 1, 1] if out_layer=='capsule': x = torch.sqrt((cap_out**2).sum(dim=2)).view(-1,self.num_predictions) # [len(im_indices),2] return x, conv3_fea
def train_roi_head(self, roi, features, bbox, label): # ------------------ ROI Nework -------------------# # ------------------ ROI 标注 -------------------# # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类 # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正 # gt_roi_label:N+1类,多了一个背景类(是不是物体) sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, utils.tonumpy(bbox), utils.tonumpy(label), ) # self.loc_normalize_mean, # self.loc_normalize_std,) # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问) # #debug # print('debug') # gt_roi_loc_ = utils.loc2bbox(sample_roi,gt_roi_loc) # print(gt_roi_loc_.shape) # print(gt_roi_loc_[:10]) # gt_roi_label_ = gt_roi_label-1 # # gt_rpn_loc_ = gt_rpn_loc_[gt_rpn_label.numpy()>=0] # # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_rpn_loc_,) # gt_roi_loc_ = gt_roi_loc_[gt_roi_label_>=0] # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_roi_loc_,gt_roi_label_) # ------------------ ROI 预测 -------------------# # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测 # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc roi_cls_loc, roi_cls_score = self.roi_head(features, sample_roi) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # [n_sample, n_class+1, 4] # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测 # roi_loc就是在ground truth上的类的位置预测 gt_roi_loc = utils.totensor(gt_roi_loc) gt_roi_label = utils.totensor(gt_roi_label).long() index = utils.totensor(np.arange(0, n_sample)).long() roi_loc = roi_cls_loc[index, gt_roi_label].contiguous() # [num_sample,4] # roi loc loss(位置回归loss) roi_loc_loss = self._roi_loc_loss(roi_loc, gt_roi_loc) # roi cls loss(分类loss,这里分21类) roi_cls_loss = F.cross_entropy(roi_cls_score, gt_roi_label) roi_loss = roi_loc_loss + roi_cls_loss self.roi_optimizer.zero_grad() roi_loss.backward() self.roi_optimizer.step() return roi_loc_loss, roi_cls_loss
def __getitem__(self, index): gt_path = self.data_info['gt_path'][index] lq_path_list = self.data_info['lq_path'][index] # get gt 4-th frame img_bytes = self.file_client.get(gt_path) img_gt = _bytes2img(img_bytes) # (H W [BGR]) # get lq 7 frames img_lqs = [] for lq_path in lq_path_list: img_bytes = self.file_client.get(lq_path) img_lq = _bytes2img(img_bytes) # (H W [BGR]) img_lqs.append(img_lq) # to tensor img_lqs.append(img_gt) img_results = totensor(img_lqs) img_lqs = torch.stack(img_results[0:-1], dim=0) img_gt = img_results[-1] return { 'lq': img_lqs, # (T [RGB] H W) 'gt': img_gt, # ([RGB] H W) }
def _roi_loc_loss( self, pred_loc, gt_loc, ): in_weight = torch.ones(gt_loc.shape) in_weight = utils.totensor(in_weight) loc_loss = self._smooth_l1_loss(pred_loc, gt_loc, in_weight) loc_loss /= gt_loc.shape[0] return loc_loss
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient( self.io_opts_dict.pop('type'), **self.io_opts_dict ) # random reverse if self.opts_dict['random_reverse'] and random.random() < 0.5: self.neighbor_list.reverse() # ========== # get frames # ========== # get the GT frame (im4.png) gt_size = self.opts_dict['gt_size'] key = self.keys[index] clip, seq, _ = key.split('/') # key example: 00001/0001/im1.png img_gt_path = key img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = _bytes2img(img_bytes) # (H W 1) # get the neighboring LQ frames img_lqs = [] for neighbor in self.neighbor_list: img_lq_path = f'{clip}/{seq}/im{neighbor}.png' img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = _bytes2img(img_bytes) # (H W 1) img_lqs.append(img_lq) # ========== # data augmentation # ========== # randomly crop img_gt, img_lqs = paired_random_crop( img_gt, img_lqs, gt_size, img_gt_path ) # flip, rotate img_lqs.append(img_gt) # gt joint augmentation with lq img_results = augment( img_lqs, self.opts_dict['use_flip'], self.opts_dict['use_rot'] ) # to tensor img_results = totensor(img_results) img_lqs = torch.stack(img_results[0:-1], dim=0) img_gt = img_results[-1] return { 'lq': img_lqs, # (T [RGB] H W) 'gt': img_gt, # ([RGB] H W) }
def _rpn_cls_loss(self, pred_label, gt_label): # in_weight = torch.zeros(gt_label.shape) in_weight = (gt_label >= 0) # in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight)] = 1 in_weight = utils.totensor(in_weight) cls_loss = F.binary_cross_entropy(pred_label, gt_label.float(), in_weight, reduction='sum') cls_loss /= (gt_label >= 0).sum().float() return cls_loss
def _rpn_loc_loss(self, pred_loc, gt_loc, gt_label): # Localization loss is calculated only for positive rois. # NOTE: unlike origin implementation, # we don't need inside_weight and outside_weight, they can calculate by gt_label # 只有正样本参与计算loc的loss,负样本和忽略的样本的in_weight=0 in_weight = torch.zeros(gt_loc.shape) in_weight[gt_label > 0] = 1 in_weight = utils.totensor(in_weight) loc_loss = self._smooth_l1_loss(pred_loc, gt_loc, in_weight) loc_loss /= ((gt_label > 0).sum().float() + 1) # 取mean, +1防止nan return loc_loss
def predict(self, img, scale): n = img.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = img.shape img_size = (H, W) # ------------------ 预测 -------------------# with torch.no_grad(): scale = utils.totensor(scale) img = utils.totensor(img) features = self.extractor(img) rpn_loc, rpn_score, roi, _ = self.rpn(features, img_size, scale) roi_cls_loc, roi_cls_score = self.roi_head(features, roi) n_roi = roi.shape[0] roi_cls_score = roi_cls_score.data roi_cls_loc = roi_cls_loc.data.view(n_roi, self.n_class, 4) roi = utils.totensor(roi) / scale mean = utils.totensor(self.loc_normalize_mean) std = utils.totensor(self.loc_normalize_std) # print(roi.size(),roi_cls_loc.size(),std.size(),mean.size()) roi_cls_loc = (roi_cls_loc * std + mean) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) # print(roi.shape,roi_cls_loc.shape) cls_bbox = utils.loc2bbox( utils.tonumpy(roi).reshape((-1, 4)), utils.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = utils.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class, 4) # clip bounding box cls_bbox[:, :, 0::2] = (cls_bbox[:, :, 0::2]).clamp(min=0, max=H) cls_bbox[:, :, 1::2] = (cls_bbox[:, :, 1::2]).clamp(min=0, max=W) prob = F.softmax(utils.totensor(roi_cls_score), dim=1) # shape:(n_roi,21) # print(prob) label = torch.max(prob, dim=1)[1].data # shape:(n_roi,) # background mask mask_label = np.where(label.cpu().numpy() != 0)[0] # print(label.cpu().numpy()) bbox = torch.gather(cls_bbox, 1, label.view(-1, 1).unsqueeze(2).repeat( 1, 1, 4)).squeeze(1) # delete background label = label.cpu().numpy()[mask_label] bbox = bbox.cpu().numpy()[mask_label] return bbox, label
def __getitem__(self, index): # get gt frame img = import_yuv( seq_path=self.data_info['gt_path'][index], h=self.data_info['h'][index], w=self.data_info['w'][index], tot_frm=1, start_frm=self.data_info['gt_index'][index], only_y=True ) img_gt = np.expand_dims( np.squeeze(img), 2 ).astype(np.float32) / 255. # (H W 1) # get lq frames img_lqs = [] for lq_index in self.data_info['lq_indexes'][index]: img = import_yuv( seq_path=self.data_info['lq_path'][index], h=self.data_info['h'][index], w=self.data_info['w'][index], tot_frm=1, start_frm=lq_index, only_y=True ) img_lq = np.expand_dims( np.squeeze(img), 2 ).astype(np.float32) / 255. # (H W 1) img_lqs.append(img_lq) # no any augmentation # to tensor img_lqs.append(img_gt) img_results = totensor(img_lqs) img_lqs = torch.stack(img_results[0:-1], dim=0) img_gt = img_results[-1] return { 'lq': img_lqs, # (T 1 H W) 'gt': img_gt, # (1 H W) 'name_vid': self.data_info['name_vid'][index], 'index_vid': self.data_info['index_vid'][index], }
def train_step(self, img, bbox, label, scale): n = bbox.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = img.shape img_size = (H, W) # extractor在这里是VGG16的前10层,通过extractor可以提取feature_map # print(img) img = utils.totensor(img) features = self.extractor(img) # ------------------ RPN Network -------------------# # ------------------ RPN 预测 -------------------# # 通过RPN网络提取roi # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4] # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2] # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4] # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到 # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引 # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的 # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测 rpn_loc, rpn_score, roi, anchor = self.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form # 因为这里只支持BatchSize=1,所以直接提取出来 bbox = bbox[0] label = label[0] rpn_score = rpn_score[0] # [n_anchor,2] rpn_loc = rpn_loc[0] # [n_anchor,4] roi = roi # ------------------ RPN 标注 -------------------# # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值 # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,在计算loss时加权区分 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( utils.tonumpy(bbox), anchor, img_size) gt_rpn_label = utils.totensor(gt_rpn_label).long() # print('RPN positive:negitive') # print('RPN samples:{}\t{}'.format(len(np.where(gt_rpn_label.cpu().data.numpy()==1)[0]),len(np.where(gt_rpn_label.cpu().data.numpy()==0)[0]))) gt_rpn_loc = utils.totensor(gt_rpn_loc) # ------------------ RPN losses 计算 -------------------# # loc loss(位置回归loss) # loc的loss只计算正样本的 rpn_loc_loss = self._fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # cls loss(分类loss,这里只分两类) # label=-1的样本被忽略 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(self.gpu), ignore_index=-1) # _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] # _rpn_score = utils.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] # self.rpn_cm.add(utils.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI Nework -------------------# # ------------------ ROI 标注 -------------------# # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类 # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正 # gt_roi_label:N+1类,多了一个背景类(是不是物体) sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, utils.tonumpy(bbox), utils.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # print('ROI foreground:backgroud') # print('ROI samples:{}\t{}'.format(len(np.where(gt_roi_label!=0)[0]),len(np.where(gt_roi_label==0)[0]))) # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问) # sample_roi_index = torch.zeros(len(sample_roi)) # ------------------ ROI 预测 -------------------# # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测 # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc roi_cls_loc, roi_cls_score = self.roi_head(features, sample_roi) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # [n_sample, n_class+1, 4] # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测 # roi_loc就是在ground truth上的类的位置预测 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(self.gpu), utils.totensor( gt_roi_label).long()] # [m_sample.4] gt_roi_label = utils.totensor(gt_roi_label).long() gt_roi_loc = utils.totensor(gt_roi_loc) # loc loss(位置回归loss) roi_loc_loss = self._fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) # cls loss(分类loss,这里分21类) roi_cls_loss = nn.CrossEntropyLoss()(roi_cls_score, gt_roi_label.cuda(self.gpu)) # self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] self.optimizer.zero_grad() rpn_loc_loss.backward(retain_graph=True) rpn_cls_loss.backward(retain_graph=True) roi_loc_loss.backward(retain_graph=True) roi_cls_loss.backward() self.optimizer.step() return LossTuple(*losses)
model = models.load_model(embedding_matrix=embedding,embedding_size=embedding.shape,**config) if config['gpu'] >= 0: model.cuda_(config['gpu']) model.train() optimizer = Adam(model.parameters(), lr=config['lr']) scheduler = LambdaLR(optimizer, lambda epoch: config['lr_decay'] ** epoch) # training for epoch in range(config['epoch_num']): scheduler.step() train_dataset = ToxicityDataset(x_train,y_train,weights,train_idx) train_dataloader = DataLoader(train_dataset,batch_size=config['batch_size'],shuffle=True,num_workers=1,collate_fn=SequenceBucketCollator()) for batch in tqdm(train_dataloader): x = utils.totensor(batch[0], config['gpu']).long() y = utils.totensor(batch[1], config['gpu']).float() w = utils.totensor(batch[2], config['gpu']).float() pred,_ = model(x) loss1 = F.binary_cross_entropy(pred[:, 0], y[:, 0], w) loss2 = F.binary_cross_entropy(pred[:, 1:], y[:, 1:]) loss = loss_sacle * loss1 + loss2 optimizer.zero_grad() loss.backward() optimizer.step() del train_dataset
weight_decay=config['weight_decay']) # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: config['lr_decay'] ** epoch) # train model.train() for epoch in range(config['epoch_num']): # scheduler.step() train_dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.float), torch.tensor(w_train, dtype=torch.float)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config['batch_size'], shuffle=True) for i, (x, y, w) in enumerate(tqdm(train_loader)): x = utils.totensor(x, config['gpu']).float() y = utils.totensor(y, config['gpu']).float() w = utils.totensor(w, config['gpu']).float() pred = model(x) loss1 = F.binary_cross_entropy(pred[:, 0], y[:, 0], w) loss2 = F.binary_cross_entropy(pred[:, 1:], y[:, 1:]) loss = loss_scale * loss1 + loss2 optimizer.zero_grad() loss.backward() optimizer.step() # evaluate model.eval()
def train_extracor_and_rpn(self, img, bbox, scale, retain_graph=True): _, _, H, W = img.shape img_size = (H, W) # print(img_size) # extractor在这里是VGG16的前43层,通过extractor可以提取feature_map # print(img) img = utils.totensor(img) features = self.extractor(img) # print(features.shape) # ------------------ RPN Network -------------------# # ------------------ RPN 预测 -------------------# # 通过RPN网络提取roi # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4] # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2] # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4] # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到 # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引 # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的 # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测 rpn_loc, rpn_score, roi, anchor = self.rpn(features, img_size, scale, training=True) rpn_score = rpn_score[0] # [n_anchor,2] rpn_loc = rpn_loc[0] # [n_anchor,4] # ------------------ RPN 标注 -------------------# # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值 # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,loc=0,在计算loss时加权区分 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( utils.tonumpy(bbox), anchor, img_size) gt_rpn_label = utils.totensor(gt_rpn_label).long() gt_rpn_loc = utils.totensor(gt_rpn_loc) # #debug # print('debug') # gt_rpn_loc_ = utils.loc2bbox(anchor,gt_rpn_loc.numpy()) # # gt_rpn_loc_ = gt_rpn_loc_[gt_rpn_label.numpy()>=0] # # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_rpn_loc_,) # anchor_ = anchor[gt_rpn_label.numpy()==0] # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,anchor_) # ------------------ RPN losses 计算 -------------------# # loc loss(位置回归loss) # loc的loss只计算正样本的 rpn_loc_loss = self._rpn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data) # cls loss(分类loss,这里只分两类) # label=-1的样本被忽略 # rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1) rpn_cls_loss = self._rpn_cls_loss(rpn_score, gt_rpn_label) rpn_loss = rpn_loc_loss + rpn_cls_loss self.extractor_rpn_optimizer.zero_grad() rpn_loss.backward(retain_graph=retain_graph) self.extractor_rpn_optimizer.step() return features, roi, rpn_loc_loss, rpn_cls_loss
def train(self): epochs = 1000 self.genA2B.train(), self.genB2A.train(), self.disGA.train( ), self.disGB.train(), self.disLA.train(), self.disLB.train() print('training start !') start_time = time.time() '''加载预训练模型''' if self.pretrain: str_genA2B = "Parameters/genA2B%03d.pdparams" % (self.start - 1) str_genB2A = "Parameters/genB2A%03d.pdparams" % (self.start - 1) str_disGA = "Parameters/disGA%03d.pdparams" % (self.start - 1) str_disGB = "Parameters/disGB%03d.pdparams" % (self.start - 1) str_disLA = "Parameters/disLA%03d.pdparams" % (self.start - 1) str_disLB = "Parameters/disLB%03d.pdparams" % (self.start - 1) genA2B_para, gen_A2B_opt = fluid.load_dygraph(str_genA2B) genB2A_para, gen_B2A_opt = fluid.load_dygraph(str_genB2A) disGA_para, disGA_opt = fluid.load_dygraph(str_disGA) disGB_para, disGB_opt = fluid.load_dygraph(str_disGB) disLA_para, disLA_opt = fluid.load_dygraph(str_disLA) disLB_para, disLB_opt = fluid.load_dygraph(str_disLB) self.genA2B.load_dict(genA2B_para) self.genB2A.load_dict(genB2A_para) self.disGA.load_dict(disGA_para) self.disGB.load_dict(disGB_para) self.disLA.load_dict(disLA_para) self.disLB.load_dict(disLB_para) for epoch in range(self.start, epochs): for block_id, data in enumerate(self.train_reader()): real_A = np.array([x[0] for x in data], np.float32) real_B = np.array([x[1] for x in data], np.float32) real_A = totensor(real_A, block_id, 'train') real_B = totensor(real_B, block_id, 'train') # Update D fake_A2B, _, _ = self.genA2B(real_A) fake_B2A, _, _ = self.genB2A(real_B) real_GA_logit, real_GA_cam_logit, _ = self.disGA(real_A) real_LA_logit, real_LA_cam_logit, _ = self.disLA(real_A) real_GB_logit, real_GB_cam_logit, _ = self.disGB(real_B) real_LB_logit, real_LB_cam_logit, _ = self.disLB(real_B) fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A) fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A) fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B) D_ad_loss_GA = mse_loss(1, real_GA_logit) + mse_loss( 0, fake_GA_logit) D_ad_cam_loss_GA = mse_loss(1, real_GA_cam_logit) + mse_loss( 0, fake_GA_cam_logit) D_ad_loss_LA = mse_loss(1, real_LA_logit) + mse_loss( 0, fake_LA_logit) D_ad_cam_loss_LA = mse_loss(1, real_LA_cam_logit) + mse_loss( 0, fake_LA_cam_logit) D_ad_loss_GB = mse_loss(1, real_GB_logit) + mse_loss( 0, fake_GB_logit) D_ad_cam_loss_GB = mse_loss(1, real_GB_cam_logit) + mse_loss( 0, fake_GB_cam_logit) D_ad_loss_LB = mse_loss(1, real_LB_logit) + mse_loss( 0, fake_LB_logit) D_ad_cam_loss_LB = mse_loss(1, real_LB_cam_logit) + mse_loss( 0, fake_LB_cam_logit) D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA + D_ad_loss_LA + D_ad_cam_loss_LA) D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB + D_ad_loss_LB + D_ad_cam_loss_LB) Discriminator_loss = D_loss_A + D_loss_B Discriminator_loss.backward() self.D_opt.minimize(Discriminator_loss) self.disGA.clear_gradients(), self.disGB.clear_gradients( ), self.disLA.clear_gradients(), self.disLB.clear_gradients() # Update G fake_A2B, fake_A2B_cam_logit, _ = self.genA2B(real_A) fake_B2A, fake_B2A_cam_logit, _ = self.genB2A(real_B) print("fake_A2B.shape:", fake_A2B.shape) fake_A2B2A, _, _ = self.genB2A(fake_A2B) fake_B2A2B, _, _ = self.genA2B(fake_B2A) fake_A2A, fake_A2A_cam_logit, _ = self.genB2A(real_A) fake_B2B, fake_B2B_cam_logit, _ = self.genA2B(real_B) fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A) fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A) fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B) G_ad_loss_GA = mse_loss(1, fake_GA_logit) G_ad_cam_loss_GA = mse_loss(1, fake_GA_cam_logit) G_ad_loss_LA = mse_loss(1, fake_LA_logit) G_ad_cam_loss_LA = mse_loss(1, fake_LA_cam_logit) G_ad_loss_GB = mse_loss(1, fake_GB_logit) G_ad_cam_loss_GB = mse_loss(1, fake_GB_cam_logit) G_ad_loss_LB = mse_loss(1, fake_LB_logit) G_ad_cam_loss_LB = mse_loss(1, fake_LB_cam_logit) G_recon_loss_A = self.L1loss(fake_A2B2A, real_A) G_recon_loss_B = self.L1loss(fake_B2A2B, real_B) G_identity_loss_A = self.L1loss(fake_A2A, real_A) G_identity_loss_B = self.L1loss(fake_B2B, real_B) G_cam_loss_A = bce_loss(1, fake_B2A_cam_logit) + bce_loss( 0, fake_A2A_cam_logit) G_cam_loss_B = bce_loss(1, fake_A2B_cam_logit) + bce_loss( 0, fake_B2B_cam_logit) G_loss_A = self.adv_weight * ( G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA ) + self.cycle_weight * G_recon_loss_A + self.identity_weight * G_identity_loss_A + self.cam_weight * G_cam_loss_A G_loss_B = self.adv_weight * ( G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB + G_ad_cam_loss_LB ) + self.cycle_weight * G_recon_loss_B + self.identity_weight * G_identity_loss_B + self.cam_weight * G_cam_loss_B Generator_loss = G_loss_A + G_loss_B Generator_loss.backward() self.G_opt.minimize(Generator_loss) self.genA2B.clear_gradients(), self.genB2A.clear_gradients() print("[%5d/%5d] time: %4.4f d_loss: %.5f, g_loss: %.5f" % (epoch, block_id, time.time() - start_time, Discriminator_loss.numpy(), Generator_loss.numpy())) print("G_loss_A: %.5f G_loss_B: %.5f" % (G_loss_A.numpy(), G_loss_B.numpy())) print("G_ad_loss_GA: %.5f G_ad_loss_GB: %.5f" % (G_ad_loss_GA.numpy(), G_ad_loss_GB.numpy())) print("G_ad_loss_LA: %.5f G_ad_loss_LB: %.5f" % (G_ad_loss_LA.numpy(), G_ad_loss_LB.numpy())) print("G_cam_loss_A:%.5f G_cam_loss_B:%.5f" % (G_cam_loss_A.numpy(), G_cam_loss_B.numpy())) print("G_recon_loss_A:%.5f G_recon_loss_B:%.5f" % (G_recon_loss_A.numpy(), G_recon_loss_B.numpy())) print("G_identity_loss_A:%.5f G_identity_loss_B:%.5f" % (G_identity_loss_B.numpy(), G_identity_loss_B.numpy())) if epoch % 2 == 1 and block_id % self.print_freq == 0: A2B = np.zeros((self.img_size * 7, 0, 3)) # B2A = np.zeros((self.img_size * 7, 0, 3)) for eval_id, eval_data in enumerate(self.test_reader()): if eval_id == 10: break real_A = np.array([x[0] for x in eval_data], np.float32) real_B = np.array([x[1] for x in eval_data], np.float32) real_A = totensor(real_A, eval_id, 'eval') real_B = totensor(real_B, eval_id, 'eval') fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A) fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B) fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A( fake_A2B) fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B( fake_B2A) fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A) fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B) a = tensor2numpy(denorm(real_A[0])) b = cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size) c = tensor2numpy(denorm(fake_A2A[0])) d = cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size) e = tensor2numpy(denorm(fake_A2B[0])) f = cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size) g = tensor2numpy(denorm(fake_A2B2A[0])) A2B = np.concatenate((A2B, (np.concatenate( (a, b, c, d, e, f, g)) * 255).astype(np.uint8)), 1).astype(np.uint8) A2B = Image.fromarray(A2B) A2B.save('Images/%d_%d.png' % (epoch, block_id)) self.genA2B.train(), self.genB2A.train(), self.disGA.train( ), self.disGB.train(), self.disLA.train( ), self.disLB.train() if epoch % 4 == 0: fluid.save_dygraph(self.genA2B.state_dict(), "Parameters/genA2B%03d" % (epoch)) fluid.save_dygraph(self.genB2A.state_dict(), "Parameters/genB2A%03d" % (epoch)) fluid.save_dygraph(self.disGA.state_dict(), "Parameters/disGA%03d" % (epoch)) fluid.save_dygraph(self.disGB.state_dict(), "Parameters/disGB%03d" % (epoch)) fluid.save_dygraph(self.disLA.state_dict(), "Parameters/disLA%03d" % (epoch)) fluid.save_dygraph(self.disLB.state_dict(), "Parameters/disLB%03d" % (epoch))
model.eval() # on train set train_dataset = ToxicityTestDataset(x_train, np.zeros((train_num, ))) train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=1, collate_fn=SequenceBucketCollator()) train_feature_fold = [] train_pred_fold = [] with torch.no_grad(): for x, _ in tqdm(train_dataloader): x = utils.totensor(x, config['gpu']).long() pred, feature = model(x) pred = pred[:, 0] train_feature_fold.append(feature) train_pred_fold.append(pred) train_feature_fold = torch.cat(train_feature_fold, dim=0) train_features.append(train_feature_fold.cpu().numpy()) train_pred_fold = torch.cat(train_pred_fold, dim=0) train_pred_fold = train_pred_fold.cpu().numpy() validate_pred_fold = train_pred_fold[validate_idx] train_preds.append( pd.DataFrame({ 'id': validate_idx,