def eval_dataset_cls(cfg_path, device=None): """分类问题的eval dataset: 等效于runner中的load_from + val,但可用来脱离runner进行独立的数据集验证 """ # 准备验证所用的对象 cfg = get_config(cfg_path) dataset = get_dataset(cfg.valset, cfg.transform_val) dataloader = get_dataloader(dataset, cfg.valloader) model = get_model(cfg) if device is None: device = torch.device(cfg.load_device) # TODO: 如下两句的顺序 load_checkpoint(model, cfg.load_from, device) model = model.to(device) # 开始验证 buffer = {'acc': []} n_correct = 0 model.eval() for c_iter, data_batch in enumerate(dataloader): with torch.no_grad(): # 停止反向传播,只进行前向计算 img = to_device(data_batch['img'], device) label = to_device(data_batch['gt_labels'], device) y_pred = model(img) label = torch.cat(label, dim=0) acc1 = accuracy(y_pred, label, topk=1) buffer['acc'].append(acc1) # 计算总体精度 n_correct += buffer['acc'][-1] * len(data_batch['gt_labels']) vis_loss_acc(buffer, title='eval dataset') print('ACC on dataset: %.3f', n_correct / len(dataset))
def vis_dataset_bbox_area(cfg_path): """用于统计一个数据集的所有bbox的面积值,以及对bbox的w,h进行: """ from utils.prepare_training import get_config, get_dataset cfg = get_config(cfg_path) cfg.trainset.params.ann_file = [cfg.trainset.params.ann_file[0] ] # 先只用voc07 trainset = get_dataset(cfg.trainset, cfg.transform) class_names = trainset.CLASSES ws = [] hs = [] areas = [] labels = [] for data in tqdm(trainset): img_meta = data['img_meta'] gt_labels = data['gt_labels'] gt_bboxes = data['gt_bboxes'] w = gt_bboxes[:, 2] - gt_bboxes[:, 0] h = gt_bboxes[:, 3] - gt_bboxes[:, 1] area = w * h ws.extend(w) hs.extend(h) areas.extend(area) labels.extend(gt_labels) ws = np.array([w.item() for w in ws]) # (k,) hs = np.array([h.item() for h in hs]) # (k,) areas = np.array([area.item() for area in areas]) # (k,) labels = np.array([label.item() for label in labels]) # (k,) # 先绘制总的分布图 plt.figure() plt.title('all') plt.hist(areas, 30, range=(0, 90000)) plt.show() # 再分别绘制每个类的hist plt.figure() for class_id in range(1, 21): # 假定20类 inds = labels == class_id class_areas = areas[inds] plt.subplot(4, 5, class_id) plt.title(class_names[class_id - 1]) plt.hist(class_areas, 30, range=(0, 90000)) plt.show() # 然后计算size = sqrt(area), 绘制size的scatter plt.figure() plt.title('w and h scatter') plt.scatter(ws, hs) # 然后对横坐标w,纵坐标h的size尺寸做聚类 data = np.concatenate([ws[:, None], hs[:, None]], axis=1) centers = kmean(data, k=5) plt.scatter(centers[:, 0], centers[:, 1], s=50, c='r') plt.show()
def train(cfg_path): """训练demo""" # 获得配置信息 cfg = get_config(cfg_path) # 创建logger logger = get_logger(cfg.log_level) logger.info("start training:") # 创建模型 model = OneStageDetector(cfg) model.to(cfg.device) # 创建数据 dataset = get_dataset(cfg.data.train) dataloader = DataLoader( dataset, batch_size=cfg.batch_size, sampler=cfg.sampler, num_workers=cfg.num_workers, collate_fn=partial(collate, samples_per_gpu=cfg.data.imgs_per_gpu), pin_memory=False) # 创建训练器并开始训练 runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) runner.register_hooks() runner.run(dataloader)
def eval_dataset_det(cfg_path, load_from=None, load_device=None, resume_from=None, result_file=None): """检测问题的eval dataset: 为了便于eval,添加2个形参参数,不必常去修改cfg里边的设置 """ # 准备验证所用的对象 cfg = get_config(cfg_path) cfg.valloader.params.batch_size = 1 # 强制固定验证时batch_size=1 # 为了便于eval,不必常去修改cfg里边的设置,直接在func里边添加几个参数即可 if load_from is not None: cfg.load_from = load_from if load_device is not None: cfg.load_device = load_device if resume_from is not None: cfg.resume_from = resume_from dataset = get_dataset(cfg.valset, cfg.transform_val) dataloader = get_dataloader(dataset, cfg.valloader, len(cfg.gpus)) model = get_model(cfg) device = torch.device(cfg.load_device) load_checkpoint(model, cfg.load_from, device) model = model.to(device) # 如果没有验证过 if result_file is None: # 开始验证 model.eval() all_bbox_cls = [] for c_iter, data_batch in enumerate(dataloader): with torch.no_grad(): # 停止反向传播,只进行前向计算 bbox_det = batch_detector( model, data_batch, device, return_loss=False)['bboxes'] # 提取bbox即可(n_cls,)(m,5) # 显示进度 if c_iter % 100 == 0: print('%d / %d finished predict.' % (c_iter, len(dataset))) all_bbox_cls.append(bbox_det) # (n_img,)(n_class,)(k,5) # 保存预测结果到文件 filename = get_time_str() + '_eval_result.pkl' save2pkl(all_bbox_cls, cfg.work_dir + filename) # 如果有现成验证文件 else: all_bbox_cls = loadvar(result_file) # 评估 voc_eval(all_bbox_cls, dataset, iou_thr=0.5)
def load_dataset(cfg_path): """加载经过变换之后的数据集,返回实际训练时的img尺寸和bbox尺寸 """ cfg = get_config(cfg_path) dataset = get_dataset(cfg.trainset, cfg.transform) ww = [] hh = [] # data = dataset[0] for data in tqdm(dataset): bbox = data['gt_bboxes'] w = (bbox[:, 2] - bbox[:, 0]).numpy() h = (bbox[:, 3] - bbox[:, 1]).numpy() ww.append(w) hh.append(h) ww = np.concatenate(ww, axis=0) # (m,) hh = np.concatenate(hh, axis=0) bboxes = np.concatenate([ww[:, None], hh[:, None]], axis=1) return bboxes # (m, 2)
""" from utils.prepare_training import get_config, get_dataset import sys, os path = os.path.abspath('.') if not path in sys.path: sys.path.insert(0, path) """显示区别 常规数据集出来的图片都是hwc,bgr格式 1. plt.imshow(), 支持hwc, rgb 2. cv2.imshow(), 支持hwc, bgr """ cfg_path = '/home/ubuntu/suliang_git/deep_learning_algorithm/demo/retinface_widerface/cfg_detector_retinaface_widerface.py' cfg = get_config(cfg_path) trainset = get_dataset(cfg.trainset, cfg.transform) tmp1 = trainset[11291] # tested id91(多人脸), 911(单人脸), 191(里边有-1), 9371(有一张侧脸) img = tmp1['img'] label = tmp1['gt_labels'] bbox = tmp1['gt_bboxes'] ldmk = tmp1['gt_landmarks'] from utils.transform import transform_inv label = label # 恢复从0为起点,从而跟CLASS匹配 transform_inv(img, bbox, label, ldmk, mean=cfg.transform.img_params.mean, std=cfg.transform.img_params.std, class_names=None,
def __init__(self, cfg, resume_from=None): # 共享变量: 需要声明在resume/load之前,否则会把resume的东西覆盖 self.c_epoch = 0 self.c_iter = 0 self.weight_ready = False self.buffer = {'loss': [], 'acc1': [], 'acc5': [], 'lr': []} # 获得配置 self.cfg = cfg if resume_from is not None: self.cfg.resume_from = resume_from # runner可以直接修改resume_from,避免修改cfg文件 # 检查文件夹和文件是否合法 self.check_dir_file(self.cfg) #设置logger self.logger = get_logger(self.cfg.logger) self.logger.info('start logging info.') #设置设备: 如果是分布式,则不同local rank(不同进程号)返回的是不同设备 self.device = get_device(self.cfg, self.logger) #创建batch处理器 self.batch_processor = get_batch_processor(self.cfg) #创建数据集 self.trainset = get_dataset(self.cfg.trainset, self.cfg.transform) self.valset = get_dataset( self.cfg.valset, self.cfg.transform_val) # 做验证的变换只做基础变换,不做数据增强 # tmp1 = self.trainset[91] # for debug: 可查看dataset __getitem__ # img = tmp1['img'] # label = tmp1['gt_labels'] # bbox = tmp1['gt_bboxes'] # ldmk = tmp1['gt_landmarks'] # from utils.transform import transform_inv # class_names = self.trainset.CLASSES # label = label - 1 # 恢复从0为起点,从而跟CLASS匹配 # transform_inv(img, bbox, label, ldmk, mean=self.cfg.transform.img_params.mean, # std=self.cfg.transform.img_params.std, class_names=class_names, show=True) #创建数据加载器 self.dataloader = get_dataloader(self.trainset, self.cfg.trainloader, len(self.cfg.gpus), dist=self.cfg.distribute) self.valloader = get_dataloader(self.valset, self.cfg.valloader, len(self.cfg.gpus), dist=self.cfg.distribute) # tmp2 = next(iter(self.dataloader)) # for debug: 设置worker=0就可查看collate_fn # 创建模型并初始化 if self.cfg.load_from is not None or self.cfg.resume_from is not None: self.cfg.backbone.params.pretrained = None # 如果load_from或resume_from,则不加载pretrained self.model = get_model(self.cfg) # 优化器:必须在model送入cuda之前创建 self.optimizer = get_optimizer(self.cfg.optimizer, self.model) # 学习率调整器 self.lr_processor = get_lr_processor(self, self.cfg.lr_processor) # 送入GPU # 包装并行模型是在optimizer提取参数之后,否则可能导致无法提取,因为并行模型在model之下加了一层module壳 self.model = get_model_wrapper(self.model, self.cfg) self.model.to(self.device) # 注意:恢复或加载是直接加载到目标设备,所以必须在模型传入设备之后进行,确保设备匹配 # 加载模型权重和训练参数,从之前断开的位置继续训练 if self.cfg.resume_from: self.resume_training(checkpoint_path=self.cfg.resume_from, map_location=self.device) # 沿用设置的device # 加载模型权重,但没有训练参数,所以一般用来做预测 elif self.cfg.load_from: load_device = torch.device(self.cfg.load_device) self._load_checkpoint(checkpoint_path=self.cfg.load_from, map_location=load_device) self.weight_ready = True