def train(): cfg = get_args(**Cfg) os.chdir(cfg.root_dir) os.makedirs(cfg.log_dir, exist_ok=True) os.makedirs(cfg.ckpt_dir, exist_ok=True) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) torch.backends.cudnn.benchmark = True cfg.device = torch.device('cuda') print('Setting up data...') train_dataset = KneeDataset(cfg.train_label, cfg) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=True, drop_last=True) val_dataset = KneeDataset(cfg.val_label, cfg, False) eval_loader = torch.utils.data.DataLoader(val_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=1, pin_memory=True, collate_fn=collate_fn) center_net = CenterNet(cfg) center_net.train(len(train_dataset), train_loader, eval_loader, cfg.num_epochs)
def main(): saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) torch.backends.cudnn.benchmark = True # disable this if OOM at beginning of training num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda') print('Setting up data...') Dataset = COCO if cfg.dataset == 'coco' else PascalVOC train_dataset = Dataset(cfg.data_dir, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=True, drop_last=True, sampler=train_sampler if cfg.dist else None) Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval val_dataset = Dataset_eval(cfg.data_dir, 'val', test_scales=[1.], test_flip=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=val_dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=train_dataset.num_classes) else: raise NotImplementedError if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) else: # todo don't use this, or wrapped it with utils.losses.Loss() ! model = nn.DataParallel(model).to(cfg.device) optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1) def train(epoch): print('\n%s Epoch: %d' % (datetime.now(), epoch)) model.train() tic = time.perf_counter() for batch_idx, batch in enumerate(train_loader): for k in batch: batch[k] = batch[k].to(device=cfg.device, non_blocking=True) outputs = model(batch['image']) hmap_tl, hmap_br, embd_tl, embd_br, regs_tl, regs_br = zip( *outputs) embd_tl = [ _tranpose_and_gather_feature(e, batch['inds_tl']) for e in embd_tl ] embd_br = [ _tranpose_and_gather_feature(e, batch['inds_br']) for e in embd_br ] regs_tl = [ _tranpose_and_gather_feature(r, batch['inds_tl']) for r in regs_tl ] regs_br = [ _tranpose_and_gather_feature(r, batch['inds_br']) for r in regs_br ] focal_loss = _neg_loss(hmap_tl, batch['hmap_tl']) + \ _neg_loss(hmap_br, batch['hmap_br']) reg_loss = _reg_loss(regs_tl, batch['regs_tl'], batch['ind_masks']) + \ _reg_loss(regs_br, batch['regs_br'], batch['ind_masks']) pull_loss, push_loss = _ae_loss(embd_tl, embd_br, batch['ind_masks']) loss = focal_loss + 0.1 * pull_loss + 0.1 * push_loss + reg_loss optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.log_interval == 0: duration = time.perf_counter() - tic tic = time.perf_counter() print( '[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) + ' focal_loss= %.5f pull_loss= %.5f push_loss= %.5f reg_loss= %.5f' % (focal_loss.item(), pull_loss.item(), push_loss.item(), reg_loss.item()) + ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration)) step = len(train_loader) * epoch + batch_idx summary_writer.add_scalar('focal_loss', focal_loss.item(), step) summary_writer.add_scalar('pull_loss', pull_loss.item(), step) summary_writer.add_scalar('push_loss', push_loss.item(), step) summary_writer.add_scalar('reg_loss', reg_loss.item(), step) return def val_map(epoch): print('\n%s Val@Epoch: %d' % (datetime.now(), epoch)) model.eval() # torch.cuda.empty_cache() results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) output = model(inputs[scale]['image'])[-1] det = _decode(*output, ae_threshold=0.5, K=100, kernel=3) det = det.reshape(det.shape[0], -1, 8).detach().cpu().numpy() if det.shape[0] == 2: det[1, :, [0, 2]] = inputs[scale]['fmap_size'][ 0, 1] - det[1, :, [2, 0]] det = det.reshape(1, -1, 8) _rescale_dets(det, inputs[scale]['ratio'], inputs[scale]['border'], inputs[scale]['size']) det[:, :, 0:4] /= scale detections.append(det) detections = np.concatenate(detections, axis=1)[0] # reject detections with negative scores detections = detections[detections[:, 4] > -1] classes = detections[..., -1] results[img_id] = {} for j in range(val_dataset.num_classes): keep_inds = (classes == j) results[img_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) soft_nms_merge(results[img_id][j + 1], Nt=0.5, method=2, weight_exp=10) # soft_nms(results[img_id][j + 1], Nt=0.5, method=2) results[img_id][j + 1] = results[img_id][j + 1][:, 0:5] scores = np.hstack([ results[img_id][j][:, -1] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > val_dataset.max_objs: kth = len(scores) - val_dataset.max_objs thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (results[img_id][j][:, -1] >= thresh) results[img_id][j] = results[img_id][j][keep_inds] eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) print('Starting training...') for epoch in range(1, cfg.num_epochs + 1): train_sampler.set_epoch(epoch) train(epoch) if cfg.val_interval > 0 and epoch % cfg.val_interval == 0: val_map(epoch) print(saver.save(model.module.state_dict(), 'checkpoint')) lr_scheduler.step(epoch) # move to here after pytorch1.1.0 summary_writer.close()
def main(): best_mAP = 0.0 saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print_log = logger.info print_log(cfg) torch.manual_seed(319) torch.backends.cudnn.benchmark = True # disable this if OOM at beginning of training num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda:%d' % cfg.device_id) print_log('Setting up data...') cfg.dictionary_file = os.path.join( cfg.dictionary_folder, 'train_dict_v{}_n{}_a{:.2f}.npy'.format(cfg.n_vertices, cfg.n_codes, cfg.sparse_alpha)) print_log('Loading the dictionary: ' + cfg.dictionary_file) dictionary = np.load(cfg.dictionary_file) if 'hourglass' in cfg.arch: cfg.padding = 127 else: cfg.padding = 31 Dataset = COCOSEGMCMM if cfg.dataset == 'coco' else KINSSEGMCMM train_dataset = Dataset(cfg.data_dir, cfg.dictionary_file, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size, padding=cfg.padding, n_coeffs=cfg.n_codes, n_vertices=cfg.n_vertices, sparse_alpha=cfg.sparse_alpha) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=False, drop_last=True, sampler=train_sampler if cfg.dist else None) Dataset_eval = COCO_eval_segm_cmm if cfg.dataset == 'coco' else KINS_eval_segm_cmm val_dataset = Dataset_eval(cfg.data_dir, cfg.dictionary_file, 'val', test_scales=[1.], test_flip=False, img_size=cfg.img_size, padding=cfg.padding, n_coeffs=cfg.n_codes, n_vertices=cfg.n_vertices, fix_size=False, sparse_alpha=cfg.sparse_alpha) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, collate_fn=val_dataset.collate_fn) print_log('Creating model...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], n_codes=cfg.n_codes) elif 'resdcn' in cfg.arch: model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]), head_conv=64, num_classes=train_dataset.num_classes, num_codes=cfg.n_codes) else: raise NotImplementedError if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) else: model = nn.DataParallel(model, device_ids=[ cfg.local_rank, ]).to(cfg.device) if cfg.pretrain_checkpoint is not None and os.path.isfile( cfg.pretrain_checkpoint): print_log('Load pretrain model from ' + cfg.pretrain_checkpoint) model = load_model(model, cfg.pretrain_checkpoint, cfg.device_id) torch.cuda.empty_cache() optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=cfg.gamma) def train(epoch): print_log('\n Epoch: %d' % epoch) model.train() # torch.autograd.set_detect_anomaly(mode=True) tic = time.perf_counter() for batch_idx, batch in enumerate(train_loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=cfg.device, non_blocking=True) dict_tensor = torch.from_numpy(dictionary.astype(np.float32)).to( cfg.device, non_blocking=True) dict_tensor.requires_grad = False outputs = model(batch['image']) # hmap, regs, w_h_, codes_1, codes_2, codes_3, offsets = zip(*outputs) hmap, regs, w_h_, codes, offsets = zip(*outputs) regs = [ _tranpose_and_gather_feature(r, batch['inds']) for r in regs ] w_h_ = [ _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_ ] codes = [ _tranpose_and_gather_feature(r, batch['inds']) for r in codes ] # c_2 = [_tranpose_and_gather_feature(r, batch['inds']) for r in codes_2] # c_3 = [_tranpose_and_gather_feature(r, batch['inds']) for r in codes_3] offsets = [ _tranpose_and_gather_feature(r, batch['inds']) for r in offsets ] hmap_loss = _neg_loss(hmap, batch['hmap']) reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks']) w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks']) offsets_loss = _reg_loss(offsets, batch['offsets'], batch['ind_masks']) # codes_loss = (norm_reg_loss(c_1, batch['codes'], batch['ind_masks'], sparsity=0.) # + norm_reg_loss(c_2, batch['codes'], batch['ind_masks'], sparsity=0.) # + norm_reg_loss(c_3, batch['codes'], batch['ind_masks'], sparsity=0.)) / 3. if cfg.code_loss == 'norm': codes_loss = norm_reg_loss(codes, batch['codes'], batch['ind_masks'], sparsity=0.) elif cfg.code_loss == 'adapt': codes_loss = adapt_norm_reg_loss(codes, batch['codes'], batch['ind_masks'], sparsity=0., norm=cfg.adapt_norm) elif cfg.code_loss == 'wing': codes_loss = wing_norm_reg_loss(codes, batch['codes'], batch['ind_masks'], sparsity=0., epsilon=cfg.wing_epsilon, omega=cfg.wing_omega) else: print('Loss type for code not implemented yet.') raise NotImplementedError loss = 1. * hmap_loss + 1. * reg_loss + 0.1 * w_h_loss + 0.1 * offsets_loss + \ cfg.code_loss_weight * codes_loss optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.log_interval == 0: duration = time.perf_counter() - tic tic = time.perf_counter() print_log( '[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) + 'Loss: hmap = %.3f reg = %.3f w_h = %.3f code = %.3f offsets = %.3f' % (hmap_loss.item(), reg_loss.item(), w_h_loss.item(), codes_loss.item(), offsets_loss.item()) + ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration)) step = len(train_loader) * epoch + batch_idx summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step) summary_writer.add_scalar('reg_loss', reg_loss.item(), step) summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step) summary_writer.add_scalar('offset_loss', offsets_loss.item(), step) summary_writer.add_scalar('code_loss', codes_loss.item(), step) return def val_map(epoch): print_log('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} speed_list = [] with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] start_image_time = time.time() segmentations = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # dict_tensor = torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device, non_blocking=True) # dict_tensor.requires_grad = False # hmap, regs, w_h_, _, _, codes, offsets = model(inputs[scale]['image'])[-1] hmap, regs, w_h_, codes, offsets = model( inputs[scale]['image'])[-1] output = [hmap, regs, w_h_, codes, offsets] segms = ctsegm_inmodal_code_decode( *output, torch.from_numpy(dictionary.astype(np.float32)).to( cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape( 1, -1, segms.shape[2])[0] top_preds = {} for j in range(cfg.n_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds( segms[:, 2 * j:2 * j + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 + 2] = transform_preds( segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 + 4] = transform_preds( segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 + 4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 + 5].astype(np.float32) top_preds[j + 1][:, :cfg.n_vertices * 2 + 4] /= scale segmentations.append(top_preds) end_image_time = time.time() segms_and_scores = { j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ segms_and_scores[j][:, cfg.n_vertices * 2 + 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = ( segms_and_scores[j][:, cfg.n_vertices * 2 + 4] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] results[img_id] = segms_and_scores speed_list.append(end_image_time - start_image_time) eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print_log(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) print_log('Average speed on val set:{:.2f}'.format( 1. / np.mean(speed_list))) return eval_results[0] print_log('Starting training...') for epoch in range(1, cfg.num_epochs + 1): start = time.time() train_sampler.set_epoch(epoch) train(epoch) if (cfg.val_interval > 0 and epoch % cfg.val_interval == 0) or epoch == 2: stat = val_map(epoch) if stat > best_mAP: print('Overall mAP {:.3f} is improving ...'.format(stat)) print_log(saver.save(model.module.state_dict(), 'checkpoint')) best_mAP = stat lr_scheduler.step() # move to here after pytorch1.1.0 epoch_time = (time.time() - start) / 3600. / 24. print_log('ETA:{:.2f} Days'.format( (cfg.num_epochs - epoch) * epoch_time)) summary_writer.close()
def main(): saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(319) torch.backends.cudnn.benchmark = True # disable this if OOM at beginning of training num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda:%d' % cfg.device_id) print('Setting up data...') dictionary = np.load(cfg.dictionary_file) Dataset = COCOSEGMSHIFT if cfg.dataset == 'coco' else PascalVOC train_dataset = Dataset(cfg.data_dir, cfg.dictionary_file, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=False, drop_last=True, sampler=train_sampler if cfg.dist else None) Dataset_eval = COCO_eval_segm_shift if cfg.dataset == 'coco' else PascalVOC_eval val_dataset = Dataset_eval(cfg.data_dir, cfg.dictionary_file, 'val', test_scales=[1.], test_flip=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, collate_fn=val_dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=train_dataset.num_classes) else: raise NotImplementedError if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) else: model = nn.DataParallel(model, device_ids=[ cfg.local_rank, ]).to(cfg.device) if cfg.pretrain_checkpoint is not None and os.path.isfile( cfg.pretrain_checkpoint): print('Load pretrain model from ' + cfg.pretrain_checkpoint) model = load_model(model, cfg.pretrain_checkpoint, cfg.device_id) torch.cuda.empty_cache() optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1) def train(epoch): print('\n Epoch: %d' % epoch) model.train() tic = time.perf_counter() for batch_idx, batch in enumerate(train_loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=cfg.device, non_blocking=True) outputs = model(batch['image']) hmap, regs, w_h_, codes_ = zip(*outputs) regs = [ _tranpose_and_gather_feature(r, batch['inds']) for r in regs ] w_h_ = [ _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_ ] codes_ = [ _tranpose_and_gather_feature(r, batch['inds']) for r in codes_ ] hmap_loss = _neg_loss(hmap, batch['hmap']) reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks']) w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks']) codes_loss = norm_reg_loss(codes_, batch['codes'], batch['ind_masks']) # codes_loss = mse_reg_loss(codes_, batch['codes'], batch['ind_masks']) loss = hmap_loss + 1 * reg_loss + 0.1 * w_h_loss + cfg.code_loss_weight * codes_loss optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.log_interval == 0: duration = time.perf_counter() - tic tic = time.perf_counter() print( '[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) + ' hmap_loss= %.3f reg_loss= %.3f w_h_loss= %.3f code_loss= %.3f' % (hmap_loss.item(), reg_loss.item(), w_h_loss.item(), codes_loss.item()) + ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration)) step = len(train_loader) * epoch + batch_idx summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step) summary_writer.add_scalar('reg_loss', reg_loss.item(), step) summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step) summary_writer.add_scalar('code_loss', codes_loss.item(), step) return def val_map(epoch): print('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} input_scales = {} speed_list = [] with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] start_image_time = time.time() segmentations = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) if scale == 1. and img_id not in input_scales.keys( ): # keep track of the input image Sizes _, _, input_h, input_w = inputs[scale]['image'].shape input_scales[img_id] = {'h': input_h, 'w': input_w} output = model(inputs[scale]['image'])[-1] segms = ctsegm_shift_decode( *output, torch.from_numpy(dictionary.astype(np.float32)).to( cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape( 1, -1, segms.shape[2])[0] top_preds = {} for j in range(cfg.n_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds( segms[:, 2 * j:2 * j + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 + 2] = transform_preds( segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 + 4] = transform_preds( segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 + 4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 + 5].astype(np.float32) top_preds[j + 1][:, :cfg.n_vertices * 2 + 4] /= scale segmentations.append(top_preds) end_image_time = time.time() segms_and_scores = { j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ segms_and_scores[j][:, cfg.n_vertices * 2 + 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = ( segms_and_scores[j][:, cfg.n_vertices * 2 + 4] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] results[img_id] = segms_and_scores speed_list.append(end_image_time - start_image_time) eval_results = val_dataset.run_eval(results, input_scales, save_dir=cfg.ckpt_dir) print(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) print('Average speed on val set:{:.2f}'.format(1. / np.mean(speed_list))) print('Starting training...') for epoch in range(1, cfg.num_epochs + 1): start = time.time() train_sampler.set_epoch(epoch) train(epoch) if (cfg.val_interval > 0 and epoch % cfg.val_interval == 0) or epoch == 3: val_map(epoch) print(saver.save(model.module.state_dict(), 'checkpoint')) lr_scheduler.step(epoch) # move to here after pytorch1.1.0 epoch_time = (time.time() - start) / 3600. / 24. print('ETA:{:.2f} Days'.format((cfg.num_epochs - epoch) * epoch_time)) summary_writer.close()
def main(): saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) # disable this if OOM at beginning of training torch.backends.cudnn.benchmark = True num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda') print('Setting up data...') Dataset = COCO if cfg.dataset == 'coco' else PascalVOC train_dataset = Dataset(cfg.data_dir, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=True, drop_last=True, sampler=train_sampler if cfg.dist else None) Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval val_dataset = Dataset_eval(cfg.data_dir, 'test', test_scales=[1.], test_flip=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=val_dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=train_dataset.num_classes) else: raise NotImplementedError if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) else: model = nn.DataParallel(model).to(cfg.device) if os.path.isfile(cfg.pretrain_dir): model = load_model(model, cfg.pretrain_dir) optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1) def train(epoch): print('\n Epoch: %d' % epoch) model.train() tic = time.perf_counter() for batch_idx, batch in enumerate(train_loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=cfg.device, non_blocking=True) outputs = model(batch['image']) # 得到heat map, reg, wh 三个变量 hmap, regs, w_h_ = zip(*outputs) regs = [ _tranpose_and_gather_feature(r, batch['inds']) for r in regs ] w_h_ = [ _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_ ] # 分别计算loss hmap_loss = _neg_loss(hmap, batch['hmap']) reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks']) w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks']) # 进行loss加权,得到最终loss loss = hmap_loss + 1 * reg_loss + 0.1 * w_h_loss optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.log_interval == 0: duration = time.perf_counter() - tic tic = time.perf_counter() print('[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) + ' hmap_loss= %.5f reg_loss= %.5f w_h_loss= %.5f' % (hmap_loss.item(), reg_loss.item(), w_h_loss.item()) + ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration)) step = len(train_loader) * epoch + batch_idx summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step) summary_writer.add_scalar('reg_loss', reg_loss.item(), step) summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step) return def val_map(epoch): print('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) print('Starting training...') for epoch in range(1, cfg.num_epochs + 1): train_sampler.set_epoch(epoch) train(epoch) if cfg.val_interval > 0 and epoch % cfg.val_interval == 0: val_map(epoch) print(saver.save(model.module.state_dict(), 'checkpoint')) lr_scheduler.step(epoch) # move to here after pytorch1.1.0 summary_writer.close()
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) torch.backends.cudnn.benchmark = False cfg.device = torch.device('cuda') print('Setting up data...') Dataset_eval = KAIST_eval dataset = Dataset_eval(cfg.data_dir, 'test', test_scales=cfg.test_scales, test_flip=cfg.test_flip) val_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] else: raise NotImplementedError model = nn.DataParallel(model).to(cfg.device) if (os.path.exists(cfg.pretrain_dir)): model.load_state_dict(torch.load(cfg.pretrain_dir)) print('loaded pretrained model from %s !' % cfg.pretrain_dir) print('test starts at %s' % datetime.now()) model.eval() results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['img_rgb'] = inputs[scale]['img_rgb'].to( cfg.device) inputs[scale]['img_ir'] = inputs[scale]['img_ir'].to( cfg.device) output = model( (inputs[scale]['img_rgb'], inputs[scale]['img_ir']))[-1] dets = _decode(*output, ae_threshold=cfg.ae_threshold, K=cfg.topk, kernel=3) dets = dets.reshape(dets.shape[0], -1, 8).detach().cpu().numpy() if dets.shape[0] == 2: dets[1, :, [0, 2]] = inputs[scale]['fmap_size'][0, 1] - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 8) _rescale_dets(dets, inputs[scale]['ratio'], inputs[scale]['border'], inputs[scale]['size']) dets[:, :, 0:4] /= scale detections.append(dets) detections = np.concatenate(detections, axis=1)[0] # reject detections with negative scores detections = detections[detections[:, 4] > -1] classes = detections[..., -1] results[img_id] = {} for j in range(dataset.num_classes): keep_inds = (classes == j) results[img_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) bboxes = results[img_id][j + 1] bboxes = bboxes[(bboxes[:, 5] != 0.0) & (bboxes[:, 6] != 0.0)] print(img_id) soft_nms_merge(bboxes, Nt=cfg.nms_threshold, method=2, weight_exp=cfg.w_exp) # soft_nms(results[img_id][j + 1], Nt=0.5, method=2) results[img_id][j + 1] = results[img_id][j + 1][:, 0:5] scores = np.hstack([ results[img_id][j][:, -1] for j in range(1, dataset.num_classes + 1) ]) if len(scores) > dataset.max_objs: kth = len(scores) - dataset.max_objs thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (results[img_id][j][:, -1] >= thresh) results[img_id][j] = results[img_id][j][keep_inds] lamr = dataset.run_eval(results, run_dir=cfg.ckpt_dir) print('log-average miss rate = {}'.format(lamr)) print('test ends at %s' % datetime.now())
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 Dataset_eval = Damage_eval dataset = Dataset_eval(cfg.data_dir, split='train', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet': model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'res_CBAM': model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_PAM': model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_SE': model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3) def Evaluate(epoch, model): print('\n Evaluate@Epoch: %d' % epoch) start_time = time.clock() print('Start time %s Seconds' % start_time) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in data_loader: img_id, inputs, img_path = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # (1,3) output = model( inputs[scale]['image'])[-1] # hmap, regs, pxpy dets = ctdet_decode( *output, K=cfg.test_topk ) # torch.cat([bboxes, scores, clses], dim=2) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores end_time = time.clock() eval_results = dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) print('End time %s Seconds' % end_time) Run_time = end_time - start_time FPS = 100 / Run_time # replace 100 with the number of images print('FPS %s ' % FPS) #summary_writer.add_scalar('Evaluate_mAP/mAP', eval_results[0], epoch) return eval_results[0] num_epochs = 60 # replace 60 with the number of epoch Max_mAP = 0 for epoch in range(1, num_epochs + 1): cfg.pretrain_dir = os.path.join(cfg.ckpt_dir, 'checkpoint_epoch' + str(epoch) + '.t7') # the address model = load_model(model, cfg.pretrain_dir) model = model.to(cfg.device) mAP = Evaluate(epoch, model) if mAP > Max_mAP: Max_mAP = mAP print('Max_AP=%s' % Max_mAP)
def main(): logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) num_gpus = torch.cuda.device_count() if cfg.dist: device = torch.device('cuda:%d' % cfg.local_rank) if cfg.dist else torch.device('cuda') torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: device = torch.device('cuda') print('==> Preparing data..') train_dataset = ImgNet_split(root=os.path.join(cfg.data_dir, 'train'), transform=imgnet_transform(is_training=True)) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, sampler=train_sampler if cfg.dist else None) val_dataset = ImgNet_split(root=os.path.join(cfg.data_dir, 'val'), transform=imgnet_transform(is_training=False)) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers) print('==> Building model..') genotype = torch.load(os.path.join(cfg.ckpt_dir, 'genotype.pickle'))['genotype'] model = NetworkImageNet(genotype, cfg.init_ch, cfg.num_cells, cfg.auxiliary, num_classes=1000) if not cfg.dist: model = nn.DataParallel(model).to(device) else: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.local_rank, ], output_device=cfg.local_rank) optimizer = torch.optim.SGD(model.parameters(), cfg.lr, momentum=0.9, weight_decay=cfg.wd) criterion = CrossEntropyLabelSmooth(num_classes=1000, epsilon=cfg.label_smooth).to(device) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.97) warmup = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=2) # Training def train(epoch): model.train() start_time = time.time() for batch_idx, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device, non_blocking=True) outputs, outputs_aux = model(inputs) loss = criterion(outputs, targets) loss_aux = criterion(outputs_aux, targets) loss += cfg.auxiliary * loss_aux optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5.0) optimizer.step() if batch_idx % cfg.log_interval == 0: step = len(train_loader) * epoch + batch_idx duration = time.time() - start_time print('[%d/%d - %d/%d] cls_loss= %.5f (%d samples/sec)' % (epoch, cfg.max_epochs, batch_idx, len(train_loader), loss.item(), cfg.batch_size * cfg.log_interval / duration)) start_time = time.time() summary_writer.add_scalar('cls_loss', loss.item(), step) summary_writer.add_scalar('learning rate', optimizer.param_groups[0]['lr'], step) def val(epoch): # switch to evaluate mode model.eval() top1 = 0 top5 = 0 with torch.no_grad(): for i, (inputs, targets) in enumerate(val_loader): inputs, targets = inputs.to(device), targets.to(device, non_blocking=True) output, _ = model(inputs) # measure accuracy and record loss _, pred = output.data.topk(5, dim=1, largest=True, sorted=True) pred = pred.t() correct = pred.eq(targets.view(1, -1).expand_as(pred)) top1 += correct[:1].view(-1).float().sum(0, keepdim=True).item() top5 += correct[:5].view(-1).float().sum(0, keepdim=True).item() top1 *= 100 / len(val_dataset) top5 *= 100 / len(val_dataset) print(' Precision@1 ==> %.2f%% Precision@1: %.2f%%\n' % (top1, top5)) summary_writer.add_scalar('Precision@1', top1, epoch) summary_writer.add_scalar('Precision@5', top5, epoch) return for epoch in range(cfg.max_epochs): print('\nEpoch: %d lr: %.5f drop_path_prob: %.3f' % (epoch, scheduler.get_lr()[0], cfg.drop_path_prob * epoch / cfg.max_epochs)) model.module.drop_path_prob = cfg.drop_path_prob * epoch / cfg.max_epochs train_sampler.set_epoch(epoch) train(epoch) val(epoch) if epoch < 5: warmup.step(epoch) else: scheduler.step(epoch) # move to here after pytorch1.1.0 print(model.module.genotype()) if cfg.local_rank == 0: torch.save(model.state_dict(), os.path.join(cfg.ckpt_dir, 'checkpoint.t7')) summary_writer.close() count_parameters(model) count_flops(model, input_size=224)
def main(): saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) torch.backends.cudnn.benchmark = True # disable this if OOM at beginning of training num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda') print('Setting up data...') Dataset = KAIST train_dataset = Dataset(cfg.data_dir, 'train', img_size=cfg.img_size) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=True, drop_last=True, sampler=train_sampler if cfg.dist else None) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] else: raise NotImplementedError if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.local_rank, ], output_device=cfg.local_rank) else: model = nn.DataParallel(model).to(cfg.device) if(os.path.exists(cfg.pretrain_dir)): model.module.load_state_dict(torch.load(cfg.pretrain_dir)) print('loaded pretrained model from %s !' % cfg.pretrain_dir) optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1) def train(epoch): print('\n%s Epoch: %d' % (datetime.now(), epoch)) model.train() tic = time.perf_counter() epoch_start = True for batch_idx, batch in enumerate(train_loader): for k in batch: batch[k] = batch[k].to(device=cfg.device, non_blocking=True) outputs = model((batch['img_rgb'], batch["img_ir"])) hmap_tl, hmap_br, embd_tl, embd_br, regs_tl, regs_br = zip(*outputs) embd_tl = [_tranpose_and_gather_feature(e, batch['inds_tl']) for e in embd_tl] embd_br = [_tranpose_and_gather_feature(e, batch['inds_br']) for e in embd_br] regs_tl = [_tranpose_and_gather_feature(r, batch['inds_tl']) for r in regs_tl] regs_br = [_tranpose_and_gather_feature(r, batch['inds_br']) for r in regs_br] focal_loss = _neg_loss(hmap_tl, batch['hmap_tl']) + \ _neg_loss(hmap_br, batch['hmap_br']) reg_loss = _reg_loss(regs_tl, batch['regs_tl'], batch['ind_masks']) + \ _reg_loss(regs_br, batch['regs_br'], batch['ind_masks']) pull_loss, push_loss = _ae_loss(embd_tl, embd_br, batch['ind_masks']) loss = focal_loss + 0.1 * pull_loss + 0.1 * push_loss + reg_loss optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.log_interval == 0: duration = time.perf_counter() - tic tic = time.perf_counter() print('[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) + ' focal_loss= %.5f pull_loss= %.5f push_loss= %.5f reg_loss= %.5f' % (focal_loss.item(), pull_loss.item(), push_loss.item(), reg_loss.item()) + ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration)) step = len(train_loader) * epoch + batch_idx summary_writer.add_scalar('focal_loss', focal_loss.item(), step) summary_writer.add_scalar('pull_loss', pull_loss.item(), step) summary_writer.add_scalar('push_loss', push_loss.item(), step) summary_writer.add_scalar('reg_loss', reg_loss.item(), step) return print('Starting training...') for epoch in range(1, cfg.num_epochs + 1): train_sampler.set_epoch(epoch) train(epoch) print(saver.save(model.state_dict(), 'checkpoint')) lr_scheduler.step(epoch) summary_writer.close()
def main(): logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) num_gpus = torch.cuda.device_count() if cfg.dist: device = torch.device( 'cuda:%d' % cfg.local_rank) if cfg.dist else torch.device('cuda') torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: device = torch.device('cuda') print('==> Preparing data..') cifar = 100 if 'cifar100' in cfg.log_name else 10 train_dataset = CIFAR_split(cifar=cifar, root=cfg.data_dir, split='train', ratio=1.0, transform=cifar_search_transform( is_training=True, cutout=cfg.cutout)) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, sampler=train_sampler if cfg.dist else None) test_dataset = CIFAR_split( cifar=cifar, root=cfg.data_dir, split='test', transform=cifar_search_transform(is_training=False)) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers) print('==> Building model..') print(os.path.join(cfg.ckpt_dir, 'seed-14880-best-genotype.pth')) #genotype = torch.load(os.path.join(cfg.ckpt_dir, 'seed-14880-best-genotype.pth')) genotype = seed14880 model = NetworkCIFAR(genotype, cfg.init_ch, cfg.num_cells, cfg.auxiliary, num_classes=cifar) if not cfg.dist: model = nn.DataParallel(model).to(device) else: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) optimizer = torch.optim.SGD(model.parameters(), cfg.lr, momentum=0.9, weight_decay=cfg.wd) criterion = nn.CrossEntropyLoss().to(device) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, cfg.max_epochs) # Training def train(epoch): model.train() start_time = time.time() for batch_idx, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device, non_blocking=True) # very important outputs, outputs_aux = model(inputs) loss = criterion(outputs, targets) loss_aux = criterion(outputs_aux, targets) loss += cfg.auxiliary * loss_aux optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5.0) optimizer.step() if batch_idx % cfg.log_interval == 0: step = len(train_loader) * epoch + batch_idx duration = time.time() - start_time print('[%d/%d - %d/%d] cls_loss= %.5f (%d samples/sec)' % (epoch, cfg.max_epochs, batch_idx, len(train_loader), loss.item(), cfg.batch_size * cfg.log_interval / duration)) start_time = time.time() summary_writer.add_scalar('cls_loss', loss.item(), step) summary_writer.add_scalar('learning rate', optimizer.param_groups[0]['lr'], step) def test(epoch): model.eval() correct = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.to(device), targets.to( device, non_blocking=True) outputs, _ = model(inputs) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum().item() acc = 100. * correct / len(test_loader.dataset) print(' Precision@1 ==> %.2f%% \n' % acc) summary_writer.add_scalar('Precision@1', acc, global_step=epoch) return for epoch in range(cfg.max_epochs): print('\nEpoch: %d lr: %.5f drop_path_prob: %.3f' % (epoch, scheduler.get_lr()[0], cfg.drop_path_prob * epoch / cfg.max_epochs)) model._modules[ 'module'].drop_path_prob = cfg.drop_path_prob * epoch / cfg.max_epochs train_sampler.set_epoch(epoch) train(epoch) test(epoch) scheduler.step(epoch) # move to here after pytorch1.1.0 #print(model.module.genotype()) if cfg.local_rank == 0: torch.save(model.state_dict(), os.path.join(cfg.ckpt_dir, 'checkpoint.t7')) summary_writer.close() count_parameters(model) count_flops(model, input_size=32)
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 Dataset_eval = Damage_eval # your own data set # Crack RE Spalling dataset = Dataset_eval(cfg.data_dir, split='val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet': model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'res_CBAM': model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_PAM': model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_SE': model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3) model = load_model(model, cfg.pretrain_dir) model = model.to(cfg.device) model.eval() results = {} with torch.no_grad(): for inputs in tqdm(data_loader): img_id, inputs,img_path = inputs[0] print('id%s ',img_id) detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds(dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds(dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(dataset.num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, dataset.num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(dataset.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] images_test = cv2.imread(img_path) fig = plt.figure(0) colors = COCO_COLORS names = COCO_NAMES #cv2.imwrite('E:/test1.png',images_test) plt.imshow(cv2.cvtColor(images_test, cv2.COLOR_BGR2RGB)) for lab in bbox_and_scores: for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if (x1 < 0): x1 = 0 if (y1 < 0): y1 = 0 if (x2 > 511): x2 = 511 if (y2 > 511): y2 = 511 if score > 0.2: plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none')) plt.text(x1 -12 , y1 - 12 , names[lab], bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k') fig.patch.set_visible(False) Save_dir = 'data/damage/Predict_images' # save images Image_name = img_path[-10:] Save_dir = os.path.join(Save_dir, Image_name) plt.axis('off') plt.savefig(Save_dir, dpi=400, transparent=True, bbox_inches="tight", pad_inches=0.1) # 保存 plt.close(0) results[img_id] = bbox_and_scores eval_results = dataset.run_eval(results, cfg.ckpt_dir) print(eval_results)
def main(): logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) num_gpus = torch.cuda.device_count() if cfg.dist: device = torch.device( 'cuda:%d' % cfg.local_rank) if cfg.dist else torch.device('cuda') torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: device = torch.device('cuda') print('==> Preparing data..') cifar = 100 if 'cifar100' in cfg.log_name else 10 train_dataset = CIFAR_split( cifar=cifar, root=cfg.data_dir, split='train', ratio=0.5, transform=cifar_search_transform(is_training=True)) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, sampler=train_sampler if cfg.dist else None) val_dataset = CIFAR_split( cifar=cifar, root=cfg.data_dir, split='val', ratio=0.5, transform=cifar_search_transform(is_training=False)) val_sampler = torch.utils.data.distributed.DistributedSampler( val_dataset, num_replicas=num_gpus, rank=cfg.local_rank) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, sampler=val_sampler if cfg.dist else None) print('==> Building model..') model = Network(C=cfg.init_ch, num_cells=cfg.num_cells, num_nodes=cfg.num_nodes, multiplier=cfg.num_nodes, num_classes=cifar) if not cfg.dist: model = nn.DataParallel(model).to(device) else: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) # proxy_model is used for 2nd order update if cfg.order == '2nd': proxy_model = Network(cfg.init_ch, cfg.num_cells, cfg.num_nodes).cuda() count_parameters(model) weights = [v for k, v in model.named_parameters() if 'alpha' not in k] alphas = [v for k, v in model.named_parameters() if 'alpha' in k] optimizer_w = optim.SGD(weights, cfg.w_lr, momentum=0.9, weight_decay=cfg.w_wd) optimizer_a = optim.Adam(alphas, lr=cfg.a_lr, betas=(0.5, 0.999), weight_decay=cfg.a_wd) criterion = nn.CrossEntropyLoss().cuda() scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_w, cfg.max_epochs, eta_min=cfg.w_min_lr) alphas = [] def train(epoch): model.train() print('\nEpoch: %d lr: %f' % (epoch, scheduler.get_lr()[0])) alphas.append([]) start_time = time.time() for batch_idx, ((inputs_w, targets_w), (inputs_a, targets_a)) \ in enumerate(zip(train_loader, val_loader)): inputs_w, targets_w = inputs_w.to(device), targets_w.to( device, non_blocking=True) inputs_a, targets_a = inputs_a.to(device), targets_a.to( device, non_blocking=True) # 1. update alpha if epoch > cfg.a_start: optimizer_a.zero_grad() if cfg.order == '1st': # using 1st order update outputs = model(inputs_a) val_loss = criterion(outputs, targets_a) val_loss.backward() else: # using 2nd order update val_loss = update(model, proxy_model, criterion, optimizer_w, inputs_a, targets_a, inputs_w, targets_w) optimizer_a.step() else: val_loss = torch.tensor([0]).cuda() # 2. update weights outputs = model(inputs_w) cls_loss = criterion(outputs, targets_w) optimizer_w.zero_grad() cls_loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5.0) optimizer_w.step() if batch_idx % cfg.log_interval == 0: step = len(train_loader) * epoch + batch_idx duration = time.time() - start_time print( '[%d/%d - %d/%d] cls_loss: %5f val_loss: %5f (%d samples/sec)' % (epoch, cfg.max_epochs, batch_idx, len(train_loader), cls_loss.item(), val_loss.item(), cfg.batch_size * cfg.log_interval / duration)) start_time = time.time() summary_writer.add_scalar('cls_loss', cls_loss.item(), step) summary_writer.add_scalar('val_loss', val_loss.item(), step) summary_writer.add_scalar('learning rate', optimizer_w.param_groups[0]['lr'], step) alphas[-1].append( model.module.alpha_normal.detach().cpu().numpy()) alphas[-1].append( model.module.alpha_reduce.detach().cpu().numpy()) return def eval(epoch): model.eval() correct = 0 total_loss = 0 with torch.no_grad(): for step, (inputs, targets) in enumerate(val_loader): inputs, targets = inputs.to(device), targets.to( device, non_blocking=True) outputs = model(inputs) total_loss += criterion(outputs, targets).item() _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum().item() acc = 100. * correct / len(val_loader.dataset) total_loss = total_loss / len(val_loader) print('Val_loss==> %.5f Precision@1 ==> %.2f%% \n' % (total_loss, acc)) summary_writer.add_scalar('Precision@1', acc, global_step=epoch) summary_writer.add_scalar('val_loss_per_epoch', total_loss, global_step=epoch) return for epoch in range(cfg.max_epochs): train_sampler.set_epoch(epoch) val_sampler.set_epoch(epoch) train(epoch) eval(epoch) scheduler.step(epoch) # move to here after pytorch1.1.0 print(model.module.genotype()) if cfg.local_rank == 0: torch.save(alphas, os.path.join(cfg.ckpt_dir, 'alphas.t7')) torch.save(model.state_dict(), os.path.join(cfg.ckpt_dir, 'search_checkpoint.t7')) torch.save({'genotype': model.module.genotype()}, os.path.join(cfg.ckpt_dir, 'genotype.t7')) summary_writer.close()
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval dataset = Dataset_eval(cfg.data_dir, split='val', img_size=cfg.img_size, test_scales=cfg.test_scales, test_flip=cfg.test_flip) data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=dataset.num_classes) else: raise NotImplementedError model = load_model(model, cfg.pretrain_dir) model = model.to(cfg.device) model.eval() results = {} with torch.no_grad(): for inputs in data_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(dataset.num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, dataset.num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(dataset.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores eval_results = dataset.run_eval(results, cfg.ckpt_dir) print(eval_results)
def main(): saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) torch.backends.cudnn.benchmark = True # disable this if OOM at beginning of training num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda') print('Setting up data...') Dataset = COCO if cfg.dataset == 'coco' else PascalVOC train_dataset = Dataset(cfg.data_dir, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=True, drop_last=True, sampler=train_sampler if cfg.dist else None) Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval val_dataset = Dataset_eval(cfg.data_dir, 'val', test_scales=[1.], test_flip=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=val_dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=train_dataset.num_classes) else: raise NotImplementedError if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.local_rank, ], output_device=cfg.local_rank) else: model = nn.DataParallel(model).to(cfg.device) if os.path.isfile(cfg.pretrain_dir): model = load_model(model, cfg.pretrain_dir) optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1) def train(epoch): print('\n Epoch: %d' % epoch) model.train() tic = time.perf_counter() for batch_idx, batch in enumerate(train_loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=cfg.device, non_blocking=True)
def main(): saver = create_saver(cfg.local_rank, save_dir=cfg.ckpt_dir) logger = create_logger(cfg.local_rank, save_dir=cfg.log_dir) summary_writer = create_summary(cfg.local_rank, log_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(300) torch.backends.cudnn.benchmark = True ''' # you can also set like this. If you do like this, the random seed will be fixed. torch.manual_seed(350) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True # consistent results on the cpu and gpu ''' num_gpus = torch.cuda.device_count() if cfg.dist: cfg.device = torch.device('cuda:%d' % cfg.local_rank) torch.cuda.set_device(cfg.local_rank) dist.init_process_group(backend='nccl', init_method='env://', world_size=num_gpus, rank=cfg.local_rank) else: cfg.device = torch.device('cuda') print('Setting up data...') Dataset = Damage train_dataset = Dataset(cfg.data_dir, 'train', split_ratio=cfg.split_ratio, img_size=cfg.img_size) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=num_gpus, rank=cfg.local_rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.batch_size // num_gpus if cfg.dist else cfg.batch_size, shuffle=not cfg.dist, num_workers=cfg.num_workers, pin_memory=True, drop_last=True, sampler=train_sampler if cfg.dist else None) Dataset_eval = Damage_eval test_dataset = Dataset_eval(cfg.data_dir, 'test', test_scales=[1.], test_flip=False) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, # 测试集的batch_size shuffle=False, num_workers=1, pin_memory=True, # 测试集的num_workers collate_fn=test_dataset.collate_fn) val_dataset = Dataset_eval(cfg.data_dir, 'val', test_scales=[1.], test_flip=False) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, # 验证集的batch_size shuffle=False, num_workers=1, pin_memory=True, # 验证集的num_workers collate_fn=val_dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet': model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_CBAM': model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_PAM': model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_SE': model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3) if cfg.dist: # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(cfg.device) model = nn.parallel.DistributedDataParallel( model, device_ids=[ cfg.local_rank, ], output_device=cfg.local_rank) else: model = nn.DataParallel(model).to(cfg.device) #if os.path.isfile(cfg.pretrain_dir): # model = load_model(model, cfg.pretrain_dir) # 不加载预训练模型 optimizer = torch.optim.Adam(model.parameters(), cfg.lr) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.lr_step, gamma=0.1) # adjust lr def train(epoch): print('\n Epoch: %d' % epoch) model.train() tic = time.perf_counter() for batch_idx, batch in enumerate(train_loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=cfg.device, non_blocking=True) outputs = model(batch['image']) hmap, regs, w_h_, pxpy = zip(*outputs) # batch * C(channel) * W * H regs = [ _tranpose_and_gather_feature(r, batch['inds']) for r in regs ] pxpy = [ _tranpose_and_gather_feature(r, batch['inds']) for r in pxpy ] w_h_ = [ _tranpose_and_gather_feature(r, batch['inds']) for r in w_h_ ] # batch * K * C= batch * 128 *2 hmap_loss = _neg_loss(hmap, batch['hmap']) reg_loss = _SmoothL1Loss(regs, batch['regs'], batch['ind_masks']) pxpy_loss = _reg_loss(pxpy, batch['pxpy'], batch['ind_masks']) w_h_loss = _SmoothL1Loss(w_h_, batch['w_h_'], batch['ind_masks']) loss = hmap_loss + 10 * reg_loss + 0.1 * w_h_loss + 0.1 * pxpy_loss optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.log_interval == 0: duration = time.perf_counter() - tic tic = time.perf_counter() print( '[%d/%d-%d/%d] ' % (epoch, cfg.num_epochs, batch_idx, len(train_loader)) + ' hmap_loss= %.5f reg_loss= %.5f w_h_loss= %.5f pxpy_loss= %.5f' % (hmap_loss.item(), reg_loss.item(), w_h_loss.item(), pxpy_loss.item()) + ' (%d samples/sec)' % (cfg.batch_size * cfg.log_interval / duration)) step = len(train_loader) * epoch + batch_idx summary_writer.add_scalar('hmap_loss', hmap_loss.item(), step) summary_writer.add_scalar('reg_loss', reg_loss.item(), step) summary_writer.add_scalar('w_h_loss', w_h_loss.item(), step) summary_writer.add_scalar('pxpy_loss', pxpy_loss.item(), step) return #--------------------test set--------------------# def test_map(epoch): print('\n Test@Epoch: %d' % epoch) start_time = time.clock() print('Start time %s Seconds' % start_time) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in test_loader: img_id, inputs, img_path = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # (1,3) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode( *output, K=cfg.test_topk ) # torch.cat([bboxes, scores, clses], dim=2) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(test_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, test_dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, test_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, test_dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores end_time = time.clock() eval_results = test_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) print('End time %s Seconds' % end_time) Run_time = end_time - start_time FPS = 100 / Run_time # replace 100 with the number of images print('FPS %s ' % FPS) summary_writer.add_scalar('test_mAP/mAP', eval_results[0], epoch) return eval_results[0] #--------------------end of test set--------------------# #--------------------validation set--------------------# def val_map(epoch): print('\n Val@Epoch: %d' % epoch) start_time = time.clock() print('Start time %s Seconds' % start_time) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs, img_path = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # (1,3) output = model( inputs[scale]['image'])[-1] # hmap, regs, pxpy dets = ctdet_decode( *output, K=cfg.test_topk ) # torch.cat([bboxes, scores, clses], dim=2) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores end_time = time.clock() eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) print('End time %s Seconds' % end_time) Run_time = end_time - start_time FPS = 100 / Run_time # replace 100 with the number of images print('FPS %s ' % FPS) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) return eval_results[0] #--------------------end of validation set--------------------# print('Starting training...') Max_test_AP = 0 # max test AP Max_val_AP = 0 # max validation AP flag_epoch = 1 for epoch in range(1, cfg.num_epochs + 1): train_sampler.set_epoch(epoch) train(epoch) if epoch >= flag_epoch: test_mAP = test_map(epoch) val_mAP = val_map(epoch) if (test_mAP > Max_test_AP): Max_test_AP = test_mAP if (val_mAP > Max_val_AP): print( saver.save(model.module.state_dict(), 'checkpoint_MaxAP_epoch' + str(epoch))) Max_val_AP = val_mAP print(saver.save(model.module.state_dict(), 'checkpoint')) # save current epoch total = sum([param.nelement() for param in model.parameters()]) # calculate parameters print("Number of parameter: %.2fM" % (total / 1e6)) print('Max_test_AP=%s' % Max_test_AP) print('Max_val_AP=%s' % Max_val_AP) lr_scheduler.step(epoch) # move to here after pytorch1.1.0 summary_writer.close()
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) torch.manual_seed(317) torch.backends.cudnn.benchmark = False cfg.device = torch.device('cuda') print('Setting up data...') val_dataset = COCO_eval(cfg.data_dir, 'val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=val_dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] else: raise NotImplementedError model = model.to(cfg.device) model.load_state_dict(torch.load(cfg.pretrain_dir)) print('loaded pretrained model from %s !' % cfg.pretrain_dir) print('validation starts at %s' % datetime.now()) model.eval() results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] detections, centers = [], [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) output = model(inputs[scale]['image'])[-1] dets, cts = _decode(*output, ae_threshold=cfg.ae_threshold, K=cfg.topk, kernel=3) dets = dets.reshape(dets.shape[0], -1, 8).detach().cpu().numpy() cts = cts.reshape(cts.shape[0], -1, 4).detach().cpu().numpy() if dets.shape[0] == 2: dets[1, :, [0, 2]] = inputs[scale]['fmap_size'][0, 1] - dets[1, :, [2, 0]] cts[1, :, [0]] = inputs[scale]['fmap_size'][0, 1] - cts[1, :, [0]] dets = dets.reshape(1, -1, 8) cts = cts.reshape(1, -1, 4) _rescale_dets(dets, cts, inputs[scale]['ratio'], inputs[scale]['border'], inputs[scale]['size']) dets[:, :, 0:4] /= scale cts[:, :, 0:2] /= scale detections.append(dets) if scale == 1: centers.append(cts) detections = np.concatenate(detections, axis=1)[0] centers = np.concatenate(centers, axis=1)[0] detections, classes = center_match(detections, centers) results[img_id] = {} for j in range(val_dataset.num_classes): keep_inds = (classes == j) results[img_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) soft_nms_merge(results[img_id][j + 1], Nt=cfg.nms_threshold, method=2, weight_exp=cfg.w_exp) # soft_nms(results[img_id][j + 1], Nt=0.5, method=2) results[img_id][j + 1] = results[img_id][j + 1][:, 0:5] scores = np.hstack([ results[img_id][j][:, -1] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > val_dataset.max_objs: kth = len(scores) - val_dataset.max_objs thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (results[img_id][j][:, -1] >= thresh) results[img_id][j] = results[img_id][j][keep_inds] eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) print('validation ends at %s' % datetime.now())