def __init__(self, args): self.args = args self.iteration = 0 # setup data set and data loader self.dataloader = create_loader(args) # set up losses and metrics self.rec_loss_func = { key: getattr(loss_module, key)() for key, val in args.rec_loss.items() } self.adv_loss = getattr(loss_module, args.gan_type)() # Image generator input: [rgb(3) + mask(1)], discriminator input: [rgb(3)] net = importlib.import_module('model.' + args.model) self.netG = net.InpaintGenerator(args).cuda() self.optimG = torch.optim.Adam(self.netG.parameters(), lr=args.lrg, betas=(args.beta1, args.beta2)) self.netD = net.Discriminator().cuda() self.optimD = torch.optim.Adam(self.netD.parameters(), lr=args.lrd, betas=(args.beta1, args.beta2)) self.load() if args.distributed: self.netG = DDP(self.netG, device_ids=[args.local_rank], output_device=[args.local_rank]) self.netD = DDP(self.netD, device_ids=[args.local_rank], output_device=[args.local_rank])
def main(): args, args_text = _parse_args() args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() model_name = 'efficientdet_d0' data_config = get_efficientdet_config(model_name) train_anno_set = 'train2017' train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json') train_image_dir = train_anno_set dataset_train = CocoDetection(os.path.join(args.data, train_image_dir), train_annotation_path, data_config) print("Length of training dataset {}".format(len(dataset_train))) loader_train = create_loader( dataset_train, input_size=args.input_size, batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, #re_prob=args.reprob, # FIXME add back various augmentations #re_mode=args.remode, #re_count=args.recount, #re_split=args.resplit, #color_jitter=args.color_jitter, #auto_augment=args.aa, interpolation=args.train_interpolation, #mean=data_config['mean'], #std=data_config['std'], num_workers=args.workers, distributed=args.distributed, #collate_fn=collate_fn, pin_mem=args.pin_mem, ) print("Iterations per epoch {}".format( math.ceil(len(dataset_train) / (args.batch_size * args.world_size)))) data_time_m = AverageMeter() end = time.time() if args.local_rank == 0: print("Starting to test...") for batch_idx, (input, target) in enumerate(loader_train): data_time_m.update(time.time() - end) if args.local_rank == 0 and batch_idx % 20 == 0: print("batch time till {} is {}".format(batch_idx, data_time_m.avg)) end = time.time()
def valid(valid_ds): model.eval() valid_loss = 0. valid_loader = create_loader(args, valid_ds) for (x, x_len, y) in valid_loader: with torch.no_grad(): loss = criterion(model(x, x_len), y) valid_loss += loss.item() return valid_loss / len(valid_loader)
def main(): args = parser.parse_args() args.gpu_id = 0 # Set graph optimization level sess_options = onnxruntime.SessionOptions() sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL if args.profile: sess_options.enable_profiling = True if args.onnx_output_opt: sess_options.optimized_model_filepath = args.onnx_output_opt session = onnxruntime.InferenceSession(args.onnx_input, sess_options) data_config = resolve_data_config(None, args) loader = create_loader( Dataset(args.data, load_bytes=args.tf_preprocessing), input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=False, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=data_config['crop_pct'], tensorflow_preprocessing=args.tf_preprocessing) input_name = session.get_inputs()[0].name batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for i, (input, target) in enumerate(loader): # run the net and return prediction output = session.run([], {input_name: input.data.numpy()}) output = output[0] # measure accuracy and record loss prec1, prec5 = accuracy_np(output, target.numpy()) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format( top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg))
def valid(args): model.eval() valid_loader = create_loader(args, valid_ds) valid_loss = 0. for (x, y, sl) in valid_loader: with torch.no_grad(): logits = model(x, sl) loss = criterion(logits, y) valid_loss += loss.item() return valid_loss / len(valid_loader)
def test(test_ds): model.eval() test_loader = create_loader(args, test_ds, shuffle=False) y_true, y_pred = [], [] for (x, x_len, y) in test_loader: with torch.no_grad(): preds = model(x, x_len).argmax(1) for i in range(x.size(0)): y_true.append(y[i].item()) y_pred.append(preds[i].item()) return classification_report(y_true, y_pred, digits=6)
def test(args): model.load_state_dict(torch.load(args.ckpt)) model.eval() test_loader = create_loader(args, test_ds, shuffle=False) y_true, y_pred = [], [] for (x, y, sl) in test_loader: with torch.no_grad(): logits = model(x, sl) # [B, C] preds = logits.argmax(dim=1) for i in range(len(x)): y_true.append(y[i].item()) y_pred.append(preds[i].item()) y_true = np.array(y_true) y_pred = np.array(y_pred) return classification_report(y_true, y_pred, digits=4)
def train(args): model.train() train_loader = create_loader(args, train_ds) train_loss = 0. global_steps = 0 optimizer.zero_grad() for i, (x, y, sl) in enumerate(train_loader, 1): loss = criterion(model(x, sl), y) if args.grad_step != -1: loss = loss / args.grad_step train_loss += loss.item() loss.backward() if _grad_step(args, i): optimizer.step() optimizer.zero_grad() global_steps += 1 return train_loss / global_steps
def train(train_ds): model.train() train_loss = 0. global_steps = 0. train_loader = create_loader(args, train_ds) optimizer.zero_grad() for i, (x, x_len, y) in enumerate(train_loader, 1): loss = criterion(model(x, x_len), y) if args.accumulate_grad > 1: loss = loss / args.accumulate_grad train_loss += loss.item() loss.backward() if optimizer_step(args, i): optimizer.step() optimizer.zero_grad() global_steps += 1 return train_loss / global_steps
def validate(args): setup_default_logging() def setthresh(): if args.checkpoint.split("/")[-1].split( "_")[0] in getthresholds.keys(): return getthresholds[args.checkpoint.split("/")[-1].split("_")[0]] else: a = [] [a.append(args.threshold) for x in range(4)] return a threshs = setthresh() print(threshs) # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher # create model bench = create_model( args.model, bench_task='predict', pretrained=args.pretrained, redundant_bias=args.redundant_bias, checkpoint_path=args.checkpoint, checkpoint_ema=args.use_ema, ) input_size = bench.config.image_size param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = bench.cuda() if has_amp: print('Using AMP mixed precision.') bench = amp.initialize(bench, opt_level='O1') else: print('AMP not installed, running network in FP32.') if args.num_gpu > 1: bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = args.anno else: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno print(os.path.join(args.data, image_dir), annotation_path) dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path) loader = create_loader(dataset, input_size=input_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, fill_color=args.fill_color, num_workers=args.workers, pin_mem=args.pin_mem, mean=args.mean, std=args.std) if 'test' in args.anno: threshold = float(args.threshold) else: threshold = .001 img_ids = [] results = [] writetofilearrtay = [] bench.eval() batch_time = AverageMeter() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): output = bench(input, target['img_scale'], target['img_size']) output = output.cpu() # print(target['img_id']) sample_ids = target['img_id'].cpu() for index, sample in enumerate(output): image_id = int(sample_ids[index]) for det in sample: score = float(det[4]) if score < threshold: # stop when below this threshold, scores in descending order coco_det = dict(image_id=image_id, category_id=-1) img_ids.append(image_id) results.append(coco_det) break coco_det = dict(image_id=image_id, bbox=det[0:4].tolist(), score=score, category_id=int(det[5]), sizes=target['img_size'].tolist()[0]) img_ids.append(image_id) results.append(coco_det) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, )) if 'test' in args.anno: from itertools import groupby results.sort(key=lambda x: x['image_id']) f = open( str(args.model) + "-" + str(args.anno) + "-" + str(min(threshs)) + ".txt", "w+") # for item in tqdm(writetofilearrtay): xxx = 0 for k, v in tqdm(groupby(results, key=lambda x: x['image_id'])): xxx += 1 f.write(getimageNamefromid(k) + ",") #print(getimageNamefromid(k),", ") for i in v: if i['category_id'] > 0: if (i['category_id'] ==1 and i['score'] >= threshs[0] ) or (i['category_id'] ==2 and i['score'] >= threshs[1] ) or \ (i['category_id'] ==3 and i['score'] >= threshs[2] ) or (i['category_id'] ==4 and i['score'] >= threshs[3] ) : f.write( str(round(i['category_id'])) + " " + str(round(i['bbox'][0])) + " " + str(round(i['bbox'][1])) + " " + str( round( float(i['bbox'][0]) + float(i['bbox'][2]))) + " " + str( round( float(i['bbox'][1]) + float(i['bbox'][3]))) + " ") f.write('\n') # print(i['category_id']," ",i['bbox'][0]," ",i['bbox'][1]," ",i['bbox'][2]," ",i['bbox'][3]," ") print("generated lines:", xxx) f.close() # f.close() if 'test' not in args.anno: array_of_dm = [] array_of_gt = [] i = 0 # if 'test' in args.anno : for _, item in tqdm(dataset): # if item["img_id"] == "1000780" : # print(item) for i in range(len(item['cls'])): # print(str(item["img_id"]),) array_of_gt.append( BoundingBox(imageName=str(item["img_id"]), classId=item["cls"][i], x=item["bbox"][i][1] * item['img_scale'], y=item["bbox"][i][0] * item['img_scale'], w=item["bbox"][i][3] * item['img_scale'], h=item["bbox"][i][2] * item['img_scale'], typeCoordinates=CoordinatesType.Absolute, bbType=BBType.GroundTruth, format=BBFormat.XYX2Y2, imgSize=(item['img_size'][0], item['img_size'][1]))) for item in tqdm(results): if item["category_id"] >= 0: array_of_dm.append( BoundingBox(imageName=str(item["image_id"]), classId=item["category_id"], classConfidence=item["score"], x=item['bbox'][0], y=item['bbox'][1], w=item['bbox'][2], h=item['bbox'][3], typeCoordinates=CoordinatesType.Absolute, bbType=BBType.Detected, format=BBFormat.XYWH, imgSize=(item['sizes'][0], item['sizes'][1]))) myBoundingBoxes = BoundingBoxes() # # # # Add all bounding boxes to the BoundingBoxes object: for box in (array_of_gt): myBoundingBoxes.addBoundingBox(box) for dm in array_of_dm: myBoundingBoxes.addBoundingBox(dm) evaluator = Evaluator() f1res = [] f1resd0 = [] f1resd10 = [] f1resd20 = [] f1resd40 = [] for conf in tqdm(range(210, 600, 1)): metricsPerClass = evaluator.GetPascalVOCMetrics( myBoundingBoxes, IOUThreshold=0.5, ConfThreshold=conf / 1000.0) totalTP = 0 totalp = 0 totalFP = 0 tp = [] fp = [] ta = [] # print('-------') for mc in metricsPerClass: tp.append(mc['total TP']) fp.append(mc['total FP']) ta.append(mc['total positives']) totalFP = totalFP + mc['total FP'] totalTP = totalTP + mc['total TP'] totalp = totalp + (mc['total positives']) # print(totalTP," ",totalFP," ",totalp) if totalTP + totalFP == 0: p = -1 else: p = totalTP / (totalTP + totalFP) if totalp == 0: r = -1 else: r = totalTP / (totalp) f1_dict = dict(tp=totalTP, fp=totalFP, totalp=totalp, conf=conf / 1000.0, prec=p, rec=r, f1score=(2 * p * r) / (p + r)) f1res.append(f1_dict) #must clean these parts f1resd0.append( dict(tp=tp[0], fp=fp[0], totalp=ta[0], conf=conf / 1000.0, prec=tp[0] / (tp[0] + fp[0]), rec=tp[0] / ta[0], f1score=(2 * (tp[0] / (tp[0] + fp[0])) * (tp[0] / ta[0])) / ((tp[0] / (tp[0] + fp[0])) + (tp[0] / ta[0])))) f1resd10.append( dict(tp=tp[1], fp=fp[1], totalp=ta[1], conf=conf / 1000.0, prec=tp[1] / (tp[1] + fp[1]), rec=tp[1] / ta[1], f1score=(2 * (tp[1] / (tp[1] + fp[1])) * (tp[1] / ta[1])) / ((tp[1] / (tp[1] + fp[1])) + (tp[1] / ta[1])))) f1resd20.append( dict(tp=tp[2], fp=fp[2], totalp=ta[2], conf=conf / 1000.0, prec=tp[2] / (tp[2] + fp[2]), rec=tp[2] / ta[2], f1score=(2 * (tp[2] / (tp[2] + fp[2])) * (tp[2] / ta[2])) / ((tp[2] / (tp[2] + fp[2])) + (tp[2] / ta[2])))) f1resd40.append( dict(tp=tp[3], fp=fp[3], totalp=ta[3], conf=conf / 1000.0, prec=tp[3] / (tp[3] + fp[3]), rec=tp[3] / ta[3], f1score=(2 * (tp[3] / (tp[3] + fp[3])) * (tp[3] / ta[3])) / ((tp[3] / (tp[3] + fp[3])) + (tp[3] / ta[3])))) sortedf1 = sorted(f1res, key=lambda k: k['f1score'], reverse=True) f1resd0 = sorted(f1resd0, key=lambda k: k['f1score'], reverse=True) f1resd10 = sorted(f1resd10, key=lambda k: k['f1score'], reverse=True) f1resd20 = sorted(f1resd20, key=lambda k: k['f1score'], reverse=True) f1resd40 = sorted(f1resd40, key=lambda k: k['f1score'], reverse=True) print(sortedf1[0]) print("\n\n") print(f1resd0[0]) print(f1resd10[0]) print(f1resd20[0]) print(f1resd40[0]) # sortedf1 = sorted(f1res, key=lambda k: k['f1score'],reverse=True) # print(sortedf1[0:2]) # json.dump(results, open(args.results, 'w'), indent=4) json.dump(results, open(args.results, 'w'), indent=4) # coco_results = dataset.coco.loadRes(args.results) # coco_eval = COCOeval(dataset.coco, coco_results, 'bbox') # coco_eval.params.imgIds = img_ids # score only ids we've used # coco_eval.evaluate() # coco_eval.accumulate() # coco_eval.summarize() # print(coco_eval.eval['params']) json.dump(results, open(args.results, 'w'), indent=4) return results
def main(): # setting arguments parser = argparse.ArgumentParser(description='Test arguments') parser.add_argument('--opt', type=str, required=True, help='path to test yaml file') parser.add_argument('--name', type=str, required=True, help='test log file name') parser.add_argument('--dataset_name', type=str, default=None) parser.add_argument('--scale', type=int, required=True) parser.add_argument('--gpu_ids', type=str, default=None, help='which gpu to use') parser.add_argument('--which_model', type=str, required=True, help='which pretrained model') parser.add_argument('--pretrained', type=str, required=True, help='pretrained path') args = parser.parse_args() args, lg = test_parse(args) pn = 50 half_pn = pn//2 lg.info('\n' + '-'*pn + 'General INFO' + '-'*pn) # create test dataloader test_loader_list = [] for i in range(len(args['dataset_list'])): # get single dataset and dataloader single_dataset_args = copy.deepcopy(args) single_dataset_args['datasets']['test']['dataroot_HR'] = single_dataset_args['datasets']['test']['dataroot_HR'][i] single_dataset_args['datasets']['test']['dataroot_LR'] = single_dataset_args['datasets']['test']['dataroot_LR'][i] test_dataset = create_dataset(single_dataset_args['datasets']['test']) test_loader = create_loader(test_dataset, args['datasets']['test']) test_loader_list.append(test_loader) # create model device = 'cuda' if torch.cuda.is_available() else 'cpu' model = create_model(args['networks']).to(device) lg.info('Create model: [{}]'.format(args['networks']['which_model'])) scale = args['scale'] # calc number of parameters in_ = torch.randn(1, 3, round(720/scale), round(1280/scale)).to(device) params, GFlops = summary(model, in_) lg.info('Total parameters: [{:.3f}M], GFlops: [{:.4f}G]'.format(params / 1e6, GFlops / 1e9)) # load pretrained model state_dict = torch.load(args['networks']['pretrained']) new_state_dict = {} for k, v in state_dict.items(): if k[:7] == 'module.': new_state_dict[k[7:]] = v else: new_state_dict[k] = v model.load_state_dict(new_state_dict, strict=True) lg.info('Load pretrained from: [{}]'.format(args['networks']['pretrained'])) for i, test_loader in enumerate(test_loader_list): dataset_name = args['dataset_list'][i] l = (12-len(dataset_name)) // 2 e = len(dataset_name) % 2 lg.info('\n\n' + '-'*(pn+l) + dataset_name + '-'*(pn+l+e)) lg.info('Number of [{}] images: [{}]'.format(dataset_name, len(test_loader))) # calculate cuda time avg_test_time = 0.0 if args['calc_cuda_time'] and 'Set5' in dataset_name: lg.info('Start calculating cuda time...') avg_test_time = calc_cuda_time(test_loader, model) lg.info('Average cuda time on [{}]: [{:.5f}ms]'.format(dataset_name, avg_test_time)) # calucate psnr and ssim psnr_list = [] ssim_list = [] model.eval() for iter, data in enumerate(test_loader): lr = data['LR'].to(device) hr = data['HR'] # calculate evaluation metrics with torch.no_grad(): sr = model(lr) #save(args['networks']['which_model'], dataset_name, data['filename'][0], tensor2np(sr)) psnr, ssim = calc_metrics(tensor2np(sr), tensor2np(hr), crop_border=scale, test_Y=True) psnr_list.append(psnr) ssim_list.append(ssim) lg.info('[{:03d}/{:03d}] || PSNR/SSIM: {:.2f}/{:.4f} || {}'.format(iter+1, len(test_loader), psnr, ssim, data['filename'])) avg_psnr = sum(psnr_list) / len(psnr_list) avg_ssim = sum(ssim_list) / len(ssim_list) if avg_test_time > 0: lg.info('Average PSNR: {:.2f} Average SSIM: {:.4f}, Average time: {:.5f}ms'.format(avg_psnr, avg_ssim, avg_test_time)) else: lg.info('Average PSNR: {:.2f} Average SSIM: {:.4f}'.format(avg_psnr, avg_ssim)) lg.info('\n' + '-'*pn + '---Finish---' + '-'*pn)
def main(): args = parser.parse_args() if not args.checkpoint and not args.pretrained: args.pretrained = True amp_autocast = suppress # do nothing if args.amp: if not has_native_amp: print( "Native Torch AMP is not available (requires torch >= 1.6), using FP32." ) else: amp_autocast = torch.cuda.amp.autocast # create model model = geffnet.create_model(args.model, num_classes=args.num_classes, in_chans=3, pretrained=args.pretrained, checkpoint_path=args.checkpoint, scriptable=args.torchscript) if args.channels_last: model = model.to(memory_format=torch.channels_last) if args.torchscript: torch.jit.optimized_execution(True) model = torch.jit.script(model) print('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(model, args) criterion = nn.CrossEntropyLoss() if not args.no_cuda: if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range( args.num_gpu))).cuda() else: model = model.cuda() criterion = criterion.cuda() loader = create_loader(Dataset(args.data, load_bytes=args.tf_preprocessing), input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=not args.no_cuda, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=data_config['crop_pct'], tensorflow_preprocessing=args.tf_preprocessing) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): if not args.no_cuda: target = target.cuda() input = input.cuda() if args.channels_last: input = input.contiguous(memory_format=torch.channels_last) # compute output with amp_autocast(): output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( 'Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, loss=losses, top1=top1, top5=top5)) print( ' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})' .format(top1=top1, top1a=100 - top1.avg, top5=top5, top5a=100. - top5.avg))
def main(): args = parser.parse_args() args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1: print( 'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.' ) args.num_gpu = 1 args.device = 'cuda:0' args.world_size = 1 r = -1 if args.distributed: args.num_gpu = 1 args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() r = torch.distributed.get_rank() if args.distributed: print( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (r, args.world_size)) else: print('Training with a single process on %d GPUs.' % args.num_gpu) # FIXME seed handling for multi-process distributed? torch.manual_seed(args.seed) output_dir = '' if args.local_rank == 0: if args.output: output_base = args.output else: output_base = './output' exp_name = '-'.join([ datetime.now().strftime("%Y%m%d-%H%M%S"), args.model, str(args.img_size) ]) output_dir = get_outdir(output_base, 'train', exp_name) model = create_model(args.model, pretrained=args.pretrained, num_classes=args.num_classes, drop_rate=args.drop, global_pool=args.gp, bn_tf=args.bn_tf, bn_momentum=args.bn_momentum, bn_eps=args.bn_eps, checkpoint_path=args.initial_checkpoint) print('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(model, args, verbose=args.local_rank == 0) # optionally resume from a checkpoint start_epoch = 0 optimizer_state = None if args.resume: optimizer_state, start_epoch = resume_checkpoint( model, args.resume, args.start_epoch) if args.num_gpu > 1: if args.amp: print( 'Warning: AMP does not work well with nn.DataParallel, disabling. ' 'Use distributed mode for multi-GPU AMP.') args.amp = False model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model.cuda() optimizer = create_optimizer(args, model) if optimizer_state is not None: optimizer.load_state_dict(optimizer_state) if has_apex and args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True print('AMP enabled') else: use_amp = False print('AMP disabled') if args.distributed: model = DDP(model, delay_allreduce=True) lr_scheduler, num_epochs = create_scheduler(args, optimizer) if start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: print('Scheduled epochs: ', num_epochs) train_dir = os.path.join(args.data, 'train') if not os.path.exists(train_dir): print('Error: training folder does not exist at: %s' % train_dir) exit(1) dataset_train = Dataset(train_dir) collate_fn = None if args.prefetcher and args.mixup > 0: collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes) loader_train = create_loader( dataset_train, input_size=data_config['input_size'], batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, rand_erase_prob=args.reprob, rand_erase_mode=args.remode, interpolation= 'random', # FIXME cleanly resolve this? data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, collate_fn=collate_fn, ) eval_dir = os.path.join(args.data, 'validation') if not os.path.isdir(eval_dir): print('Error: validation folder does not exist at: %s' % eval_dir) exit(1) dataset_eval = Dataset(eval_dir) loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], batch_size=4 * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, ) if args.mixup > 0.: # smoothing is handled with mixup label transform train_loss_fn = SoftTargetCrossEntropy().cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() elif args.smoothing: train_loss_fn = LabelSmoothingCrossEntropy( smoothing=args.smoothing).cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() else: train_loss_fn = nn.CrossEntropyLoss().cuda() validate_loss_fn = train_loss_fn eval_metric = args.eval_metric saver = None if output_dir: decreasing = True if eval_metric == 'loss' else False saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing) best_metric = None best_epoch = None try: for epoch in range(start_epoch, num_epochs): if args.distributed: loader_train.sampler.set_epoch(epoch) train_metrics = train_epoch(epoch, model, loader_train, optimizer, train_loss_fn, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, use_amp=use_amp) eval_metrics = validate(model, loader_eval, validate_loss_fn, args) if lr_scheduler is not None: lr_scheduler.step(epoch, eval_metrics[eval_metric]) update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) if saver is not None: # save proper checkpoint with eval metric best_metric, best_epoch = saver.save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'args': args, }, epoch=epoch + 1, metric=eval_metrics[eval_metric]) except KeyboardInterrupt: pass if best_metric is not None: print('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))
def main(): setup_default_logging() args, args_text = _parse_args() args.pretrained_backbone = not args.no_pretrained_backbone args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.distributed: logging.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: logging.info('Training with a single process on 1 GPU.') torch.manual_seed(args.seed + args.rank) model = create_model( args.model, bench_task='train', pretrained=args.pretrained, pretrained_backbone=args.pretrained_backbone, redundant_bias=args.redundant_bias, checkpoint_path=args.initial_checkpoint, ) # FIXME decide which args to keep and overlay on config / pass to backbone # num_classes=args.num_classes, # drop_rate=args.drop, # drop_path_rate=args.drop_path, # drop_block_rate=args.drop_block, input_size = model.config.image_size if args.local_rank == 0: logging.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) model.cuda() optimizer = create_optimizer(args, model) use_amp = False if has_apex and args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True if args.local_rank == 0: logging.info('NVIDIA APEX {}. AMP {}.'.format( 'installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) # optionally resume from a checkpoint resume_state = {} resume_epoch = None if args.resume: resume_state, resume_epoch = resume_checkpoint(unwrap_bench(model), args.resume) if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: if args.local_rank == 0: logging.info('Restoring Optimizer state from checkpoint') optimizer.load_state_dict(resume_state['optimizer']) if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if args.local_rank == 0: logging.info('Restoring NVIDIA AMP state from checkpoint') amp.load_state_dict(resume_state['amp']) del resume_state model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEma(model, decay=args.model_ema_decay) #resume=args.resume) # FIXME bit of a mess with bench if args.resume: load_checkpoint(unwrap_bench(model_ema), args.resume, use_ema=True) if args.distributed: if args.sync_bn: try: if has_apex: model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( model) if args.local_rank == 0: logging.info( 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.' ) except Exception as e: logging.error( 'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1' ) if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: logging.info( "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP." ) model = DDP(model, device_ids=[args.local_rank ]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP lr_scheduler, num_epochs = create_scheduler(args, optimizer) start_epoch = 0 if args.start_epoch is not None: # a specified start_epoch will always override the resume epoch start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: logging.info('Scheduled epochs: {}'.format(num_epochs)) # train_anno_set = 'train2017' # train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json') # train_image_dir = train_anno_set #dataset_train = CocoDetection("/workspace/data/images", # "/workspace/data/datatrain90n.json") train_anno_set = 'train' train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json') train_image_dir = train_anno_set dataset_train = CocoDetection(os.path.join(args.data, train_image_dir), train_annotation_path) # FIXME cutmix/mixup worth investigating? # collate_fn = None # if args.prefetcher and args.mixup > 0: # collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes) loader_train = create_loader( dataset_train, input_size=input_size, batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, #re_prob=args.reprob, # FIXME add back various augmentations #re_mode=args.remode, #re_count=args.recount, #re_split=args.resplit, #color_jitter=args.color_jitter, #auto_augment=args.aa, interpolation=args.train_interpolation, mean=[0.4533, 0.4744, 0.4722], #[0.4846, 0.5079, 0.5005],#[0.485, 0.456, 0.406], std=[0.2823, 0.2890, 0.3084], #[0.2687, 0.2705, 0.2869],#[0.485, 0.456, 0.406], num_workers=args.workers, distributed=args.distributed, #collate_fn=collate_fn, pin_mem=args.pin_mem, ) train_anno_set = 'val' train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json') train_image_dir = train_anno_set dataset_eval = CocoDetection(os.path.join(args.data, train_image_dir), train_annotation_path) # train_anno_set = 'val' # train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json') # train_image_dir = train_anno_set # dataset_eval = CocoDetection("/workspace/data/val/images", # "/workspace/data/dataval90n.json") loader_eval = create_loader( dataset_eval, input_size=input_size, batch_size=args.validation_batch_size_multiplier * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=args.interpolation, mean=[0.4535, 0.4744, 0.4724], #[0.4851, 0.5083, 0.5009], std=[0.2835, 0.2903, 0.3098], #[0.2690, 0.2709, 0.2877], num_workers=args.workers, #distributed=args.distributed, pin_mem=args.pin_mem, ) # for xx,item in dataset_train : # print("out",type(xx)) # break # exit() array_of_gt = [] if args.local_rank == 0: for _, item in tqdm(dataset_eval): # print(item) for i in range(len(item['cls'])): array_of_gt.append( BoundingBox(imageName=str(item["img_id"]), classId=item["cls"][i], x=item["bbox"][i][1] * item['img_scale'], y=item["bbox"][i][0] * item['img_scale'], w=item["bbox"][i][3] * item['img_scale'], h=item["bbox"][i][2] * item['img_scale'], typeCoordinates=CoordinatesType.Absolute, bbType=BBType.GroundTruth, format=BBFormat.XYX2Y2, imgSize=(item['img_size'][0], item['img_size'][1]))) evaluator = COCOEvaluator(dataset_eval.coco, distributed=args.distributed, gtboxes=array_of_gt) eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None output_dir = '' if args.local_rank == 0: output_base = args.output if args.output else './output' exp_name = '-'.join( [datetime.now().strftime("%Y%m%d-%H%M%S"), args.model]) output_dir = get_outdir(output_base, 'train', exp_name) decreasing = True if eval_metric == 'loss' else False saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing) with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: f.write(args_text) # print(model) try: for epoch in range(start_epoch, num_epochs): if args.distributed: loader_train.sampler.set_epoch(epoch) train_metrics = train_epoch(epoch, model, loader_train, optimizer, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, use_amp=use_amp, model_ema=model_ema) if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: logging.info( "Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') # the overhead of evaluating with coco style datasets is fairly high, so just ema or non, not both if model_ema is not None: if args.distributed and args.dist_bn in ('broadcast', 'reduce'): distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(model_ema.ema, loader_eval, args, evaluator, log_suffix=' (EMA)') else: eval_metrics = validate(model, loader_eval, args, evaluator) if lr_scheduler is not None: # step LR for next epoch lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) if saver is not None: update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint( unwrap_bench(model), optimizer, args, epoch=epoch, model_ema=unwrap_bench(model_ema), metric=save_metric, use_amp=use_amp) except KeyboardInterrupt: pass if best_metric is not None: logging.info('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed_args = %s", unparsed) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype, args.use_dropout) if num_gpus > 1: model = nn.DataParallel(model) model = model.cuda() else: model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) if args.load_path is not None: pretrained_dict = torch.load(os.path.expanduser(args.load_path), map_location=lambda storage, loc: storage) model_dict = model.state_dict() # 获取模型的参数字典 model_dict.update(pretrained_dict['state_dict'] ) # pretrained_dict与model_dict相同key的value的维度必须相同 model.load_state_dict(model_dict) # 更新模型权重 criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion_smooth = criterion_smooth.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) dataset_train = ImageList(root=args.root_path, fileList=args.root_path + '/' + args.trainFile) collate_fn = None if args.prefetcher and args.mixup > 0: collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes) num_train = len(dataset_train) train_queue = create_loader( dataset_train, # here call the transform implicitly input_size=144, batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, rand_erase_prob=args.reprob, rand_erase_mode=args.remode, color_jitter=args.color_jitter, interpolation= 'random', # FIXME cleanly resolve this? data_config['interpolation'], # mean=data_config['mean'], # std=data_config['std'], num_workers=args.workers, collate_fn=collate_fn, auto_augment=args.aa, use_aug=True) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_LFWACC = 0 lr = args.learning_rate writer = SummaryWriter(os.path.join(args.save, 'tensorboard'), comment='pcdarts-for-LFW') for epoch in range(args.epochs): if args.lr_scheduler == 'cosine': scheduler.step() current_lr = scheduler.get_lr()[0] elif args.lr_scheduler == 'linear': current_lr = adjust_lr(optimizer, epoch) else: print('Wrong lr type, exit') sys.exit(1) logging.info('Epoch: %d lr %e', epoch, current_lr) if epoch < 5 and args.load_path is None: # and args.batch_size > 256: for param_group in optimizer.param_groups: param_group['lr'] = lr * (epoch + 1) / 5.0 current_lr = lr * (epoch + 1) / 5.0 logging.info('Warming-up Epoch: %d, LR: %e', epoch, current_lr) if num_gpus > 1: model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs else: model.drop_path_prob = args.drop_path_prob * epoch / args.epochs epoch_start = time.time() train_acc, train_obj = train(train_queue, model, criterion_smooth, optimizer) logging.info('Train_acc: %f', train_acc) writer.add_scalar('lr', current_lr, epoch) writer.add_scalar('train_acc', train_acc, epoch) writer.add_scalar('train_loss', train_obj, epoch) if (epoch >= 50 and (epoch + 1) % 2 == 0) or args.load_path is not None: # valid_acc, valid_obj = infer(valid_queue, model, criterion) # logging.info('valid_acc %f', valid_acc) LFWACC, std, thd = lfw_eval(args, model) logging.info('lfw_eval LFW_ACC:%f LFW_std:%f LFW_thd:%f', LFWACC, std, thd) writer.add_scalar('LFW_ACC', LFWACC, epoch) writer.add_scalar('LFW_std', std, epoch) writer.add_scalar('LFW_thd', thd, epoch) is_best = False if LFWACC >= best_LFWACC: best_LFWACC = LFWACC is_best = True logging.info('lfw_eval BEST_LFW_ACC:%f', best_LFWACC) utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_LFWACC': best_LFWACC, 'optimizer': optimizer.state_dict(), }, is_best, args.save) writer.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.dropout_type) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # train_transform, valid_transform = utils._data_transforms_cifar10(args) # if args.set=='cifar100': # train_data = dset.CIFAR100(root=args.data, train=True, download=False, transform=train_transform) # else: # train_data = dset.CIFAR10(root=args.data, train=True, download=False, transform=train_transform) dataset_train = ImageList(root=args.data, fileList=args.data + '/' + args.trainFile) collate_fn = None if args.prefetcher and args.mixup > 0: collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes) num_train = len(dataset_train) # indices = list(range(num_train)) # randint(args.data_portion * num_train) indices = np.linspace(0, num_train-1, args.data_portion*num_train, dtype=np.int) random.shuffle(indices) num_train = len(indices) # patch = int(np.floor(args.data_portion * num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = create_loader( dataset_train, # here call the transform implicitly input_size=144, batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, rand_erase_prob=args.reprob, rand_erase_mode=args.remode, color_jitter=args.color_jitter, interpolation='random', # FIXME cleanly resolve this? data_config['interpolation'], # mean=data_config['mean'], # std=data_config['std'], num_workers=args.workers, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), collate_fn=collate_fn, auto_augment=args.aa ) valid_queue = create_loader( dataset_train, # here call the transform implicitly input_size=144, batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, rand_erase_prob=args.reprob, rand_erase_mode=args.remode, color_jitter=args.color_jitter, interpolation='random', # FIXME cleanly resolve this? data_config['interpolation'], # mean=data_config['mean'], # std=data_config['std'], num_workers=args.workers, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), collate_fn=collate_fn, auto_augment=args.aa ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) writer = SummaryWriter(os.path.join(args.save, 'tensorboard')) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, args.arch_epoch, writer) logging.info('train_acc %f', train_acc) writer.add_scalar('lr', lr, epoch) writer.add_scalar('train_acc', train_acc, epoch) writer.add_scalar('train_loss', train_obj, epoch) # validation # if (epoch+1) >= args.valid_epoch and (epoch+1) % 5 == 0: # LFWACC, std, thd = lfw_eval(args, model) # logging.info('lfw_eval LFW_ACC:%f LFW_std:%f LFW_thd:%f', LFWACC,std,thd) # writer.add_scalar('LFW_ACC', LFWACC, epoch) # writer.add_scalar('LFW_std', std, epoch) # writer.add_scalar('LFW_thd', thd, epoch) # utils.save(model, os.path.join(args.save, 'weights.pt')) writer.close()
def main(): args = parser.parse_args() args.gpu_id = 0 model = model_helper.ModelHelper(name="validation_net", init_params=False) # Bring in the init net from init_net.pb init_net_proto = caffe2_pb2.NetDef() with open(args.c2_init, "rb") as f: init_net_proto.ParseFromString(f.read()) model.param_init_net = core.Net( init_net_proto ) # model.param_init_net.AppendNet(core.Net(init_net_proto)) # # bring in the predict net from predict_net.pb predict_net_proto = caffe2_pb2.NetDef() with open(args.c2_predict, "rb") as f: predict_net_proto.ParseFromString(f.read()) model.net = core.Net( predict_net_proto) # model.net.AppendNet(core.Net(predict_net_proto)) data_config = resolve_data_config(args.model, args) loader = create_loader(Dataset(args.data, load_bytes=args.tf_preprocessing), input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=False, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=data_config['crop_pct'], tensorflow_preprocessing=args.tf_preprocessing) # this is so obvious, wonderful interface </sarcasm> input_blob = model.net.external_inputs[0] output_blob = model.net.external_outputs[0] if True: device_opts = None else: # CUDA is crashing, no idea why, awesome error message, give it a try for kicks device_opts = core.DeviceOption(caffe2_pb2.PROTO_CUDA, args.gpu_id) model.net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True) model.param_init_net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True) model.param_init_net.GaussianFill([], input_blob.GetUnscopedName(), shape=(1, ) + data_config['input_size'], mean=0.0, std=1.0) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net, overwrite=True) batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for i, (input, target) in enumerate(loader): # run the net and return prediction caffe2_in = input.data.numpy() workspace.FeedBlob(input_blob, caffe2_in, device_opts) workspace.RunNet(model.net, num_iter=1) output = workspace.FetchBlob(output_blob) # measure accuracy and record loss prec1, prec5 = accuracy_np(output.data, target.numpy()) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( 'Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5)) print( ' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})' .format(top1=top1, top1a=100 - top1.avg, top5=top5, top5a=100. - top5.avg))
import time from train_arguments import Arguments from data import create_loader from model import create_model from utils.general import Display args = Arguments().parse() data_loader = create_loader(args) dataset = data_loader.load_data() dataset_size = len(data_loader) nl = '\n' print( f'There are a total number of {dataset_size} sequences of {args.num_frames} frames in the training set.{nl}' ) model = create_model(args) model.set_up(args) display = Display(args) global_step = 0 total_steps = 0 print(f'Training has begun!{nl}') for epoch in range(0, args.num_epochs): data_time_start = time.time() for j, data in enumerate(data_loader): processing_time_start = time.time() if global_step % args.print_freq == 0:
def main(): args = parser.parse_args() # create model model = create_model(args.model, num_classes=args.num_classes, in_chans=3, pretrained=args.pretrained, checkpoint_path=args.checkpoint) print('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(model, args) model, test_time_pool = apply_test_time_pool(model, data_config, args) if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range( args.num_gpu))).cuda() else: model = model.cuda() criterion = nn.CrossEntropyLoss().cuda() loader = create_loader( Dataset(args.data), input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=True, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=1.0 if test_time_pool else data_config['crop_pct']) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): target = target.cuda() input = input.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( 'Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, loss=losses, top1=top1, top5=top5)) print( ' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})' .format(top1=top1, top1a=100 - top1.avg, top5=top5, top5a=100. - top5.avg))
pred.detach().cpu().numpy()) print("log: eval()") print("loss: ", loss.item(), "ap: ", ap, " map: ", map, " f1: ", f1) if __name__ == '__main__': args = parser.parse_args() # dataset print("log: evaluation.py __main__ enter") print("Loading data...") if args.stgcn: print("Evaluating stgcn") val_loader = create_loader(args) stgcn_model = load_stgcn(STGCN_WEIGHTS_PATH) eval(stgcn_model, val_loader) elif args.dgnn: print("Evaluating dgnn") val_loader = create_loader(args) dgnn_model = load_dgnn(DGNN_WEIGHTS_PATH) eval(dgnn_model, val_loader, use_bones=True) elif args.lstm: print("Evaluating lstm") subprocess.call(["python3", "lstm/main.py"]) elif args.step: print("Evaluating step")
def main(): # file and stream logger log_path = 'log/logger_info.log' lg = logger('Base', log_path) pn = 40 print('\n','-'*pn, 'General INFO', '-'*pn) # setting arguments parser = argparse.ArgumentParser(description='Test arguments') parser.add_argument('--opt', type=str, required=True, help='path to test yaml file') parser.add_argument('--dataset_name', type=str, default=None) parser.add_argument('--scale', type=int, required=True) parser.add_argument('--which_model', type=str, required=True, help='which pretrained model') parser.add_argument('--pretrained', type=str, required=True, help='pretrain path') args = parser.parse_args() args = test_parse(args, lg) # create test dataloader test_dataset = create_dataset(args['datasets']['test']) test_loader = create_loader(test_dataset, args['datasets']['test']) lg.info('\nHR root: [{}]\nLR root: [{}]'.format(args['datasets']['test']['dataroot_HR'], args['datasets']['test']['dataroot_LR'])) lg.info('Number of test images: [{}]'.format(len(test_dataset))) # create model device = 'cuda' if torch.cuda.is_available() else 'cpu' model = create_model(args['networks']).to(device) lg.info('Create model: [{}]'.format(args['networks']['which_model'])) scale = args['scale'] state_dict = torch.load(args['networks']['pretrained']) lg.info('Load pretrained from: [{}]'.format(args['networks']['pretrained'])) model.load_state_dict(state_dict) # calculate cuda time if args['calc_cuda_time']: lg.info('Start calculating cuda time...') avg_test_time = calc_cuda_time(test_loader, model) lg.info('Average cuda time: [{:.5f}]'.format(avg_test_time)) # Test print('\n', '-'*pn, 'Testing {}'.format(args['dataset_name']), '-'*pn) #pbar = ProgressBar(len(test_loader)) psnr_list = [] ssim_list = [] time_list = [] for iter, data in enumerate(test_loader): lr = data['LR'].to(device) hr = data['HR'] # calculate evaluation metrics sr = model(lr) psnr, ssim = calc_metrics(tensor2np(sr), tensor2np(hr), crop_border=scale, test_Y=True) psnr_list.append(psnr) ssim_list.append(ssim) #pbar.update('') print('[{:03d}/{:03d}] || PSNR/SSIM: {:.2f}/{:.4f} || {}'.format(iter+1, len(test_loader), psnr, ssim, data['filename'])) avg_psnr = sum(psnr_list) / len(psnr_list) avg_ssim = sum(ssim_list) / len(ssim_list) print('\n','-'*pn, 'Summary', '-'*pn) print('Average PSNR: {:.2f} Average SSIM: {:.4f}'.format(avg_psnr, avg_ssim)) print('\n','-'*pn, 'Finish', '-'*pn)
def validate(args): # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher # create model config = get_efficientdet_config(args.model) model = EfficientDet(config) if args.checkpoint: load_checkpoint(model, args.checkpoint) param_count = sum([m.numel() for m in model.parameters()]) logging.info('Model %s created, param count: %d' % (args.model, param_count)) bench = DetBenchEval(model, config) bench.model = bench.model.cuda() if has_amp: bench.model = amp.initialize(bench.model, opt_level='O1') if args.num_gpu > 1: bench.model = torch.nn.DataParallel(bench.model, device_ids=list(range( args.num_gpu))) if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = 'test2017' else: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path) loader = create_loader(dataset, input_size=config.image_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, num_workers=args.workers) img_ids = [] results = [] model.eval() batch_time = AverageMeter() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): output = bench(input, target['img_id'], target['scale']) for batch_out in output: for det in batch_out: image_id = int(det[0]) score = float(det[5]) coco_det = { 'image_id': image_id, 'bbox': det[1:5].tolist(), 'score': score, 'category_id': int(det[6]), } img_ids.append(image_id) results.append(coco_det) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, )) json.dump(results, open(args.results, 'w'), indent=4) if 'test' not in args.anno: coco_results = dataset.coco.loadRes(args.results) coco_eval = COCOeval(dataset.coco, coco_results, 'bbox') coco_eval.params.imgIds = img_ids # score only ids we've used coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return results
def validate(args): setup_dllogger(0, filename=args.dllogger_file) if args.checkpoint != '': args.pretrained = True args.prefetcher = not args.no_prefetcher if args.waymo: assert args.waymo_val is not None memory_format = (torch.channels_last if args.memory_format == "nhwc" else torch.contiguous_format) args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: torch.cuda.manual_seed_all(args.seed) args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() # Set device limit on the current device # cudaLimitMaxL2FetchGranularity = 0x05 pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int)) _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128)) _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05)) assert pValue.contents.value == 128 assert args.rank >= 0 # create model bench = create_model(args.model, input_size=args.input_size, num_classes=args.num_classes, bench_task='predict', pretrained=args.pretrained, redundant_bias=args.redundant_bias, checkpoint_path=args.checkpoint, checkpoint_ema=args.use_ema, soft_nms=args.use_soft_nms, strict_load=False) input_size = bench.config.image_size data_config = bench.config param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = bench.cuda().to(memory_format=memory_format) if args.distributed > 1: raise ValueError( "Evaluation is supported only on single GPU. args.num_gpu must be 1" ) bench = DDP( bench, device_ids=[args.device] ) # torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) if args.waymo: annotation_path = args.waymo_val_annotation image_dir = args.waymo_val else: if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = 'test2017' else: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path, data_config) evaluator = COCOEvaluator(dataset.coco, distributed=args.distributed, waymo=args.waymo) loader = create_loader(dataset, input_size=input_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, fill_color=args.fill_color, num_workers=args.workers, distributed=args.distributed, pin_mem=args.pin_mem, memory_format=memory_format) img_ids = [] results = [] dllogger_metric = {} bench.eval() batch_time = AverageMeter() throughput = AverageMeter() end = time.time() total_time_start = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): with torch.cuda.amp.autocast(enabled=args.amp): output = bench(input, target['img_scale'], target['img_size']) batch_time.update(time.time() - end) throughput.update(input.size(0) / batch_time.val) evaluator.add_predictions(output, target) torch.cuda.synchronize() # measure elapsed time if i == 9: batch_time.reset() throughput.reset() if args.rank == 0 and i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, )) end = time.time() dllogger_metric['total_inference_time'] = time.time() - total_time_start dllogger_metric['inference_throughput'] = throughput.avg dllogger_metric['inference_time'] = 1000 / throughput.avg total_time_start = time.time() mean_ap = 0. if not args.inference: if 'test' not in args.anno: mean_ap = evaluator.evaluate() else: evaluator.save_predictions(args.results) dllogger_metric['map'] = mean_ap dllogger_metric['total_eval_time'] = time.time() - total_time_start else: evaluator.save_predictions(args.results) if not args.distributed or args.rank == 0: dllogger.log(step=(), data=dllogger_metric, verbosity=0) return results
def validate(args): setup_default_logging() def setthresh(): if args.checkpoint.split("/")[-1].split( "_")[0] in getthresholds.keys(): return getthresholds[args.checkpoint.split("/")[-1].split("_")[0]] else: a = [] [a.append(args.threshold) for x in range(4)] return a # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher # create model bench = create_model( args.model, bench_task='predict', pretrained=args.pretrained, redundant_bias=args.redundant_bias, checkpoint_path=args.checkpoint, checkpoint_ema=args.use_ema, ) input_size = bench.config.image_size param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = bench.cuda() if has_amp: print('Using AMP mixed precision.') bench = amp.initialize(bench, opt_level='O1') else: print('AMP not installed, running network in FP32.') if args.num_gpu > 1: bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = args.anno elif 'val' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno # else: # annotation_path = os.path.join(args.data, f'{args.anno}.json') # image_dir = args.anno print(os.path.join(args.data, image_dir), annotation_path) dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path) loader = create_loader(dataset, input_size=input_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, fill_color=args.fill_color, num_workers=args.workers, pin_mem=args.pin_mem, mean=args.mean, std=args.std) if 'test' in args.anno: threshold = float(args.threshold) # elif 'detector' in args.anno: # threshold = min(getthresholds['d0']) else: threshold = .001 img_ids = [] results = [] writetofilearrtay = [] bench.eval() batch_time = AverageMeter() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): output = bench(input, target['img_scale'], target['img_size']) output = output.cpu() # print(target['img_id']) sample_ids = target['img_id'].cpu() for index, sample in enumerate(output): image_id = int(sample_ids[index]) # if 'test' in args.anno : # tempWritetoFile = [] # tempWritetoFile.append(getimageNamefromid(image_id)) for det in sample: score = float(det[4]) if score < threshold: # stop when below this threshold, scores in descending order coco_det = dict(image_id=image_id, category_id=-1) img_ids.append(image_id) results.append(coco_det) break coco_det = dict(image_id=image_id, bbox=det[0:4].tolist(), score=score, category_id=int(det[5]), sizes=target['img_size'].tolist()[0]) img_ids.append(image_id) results.append(coco_det) # exit() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, )) # if 'test' in args.anno : if not os.path.exists(args.tosave): os.makedirs(args.tosave) from itertools import groupby results.sort(key=lambda x: x['image_id']) count = 0 for k, v in tqdm(groupby(results, key=lambda x: x['image_id'])): # print(args.data +"/" + str(getimageNamefromid(k))) img = drawonimage( os.path.join(args.data, image_dir, str(getimageNamefromid(k))), v, setthresh()) cv2.imwrite(args.tosave + "/" + str(getimageNamefromid(k)), img) count += 1 # print(i['category_id']," ",i['bbox'][0]," ",i['bbox'][1]," ",i['bbox'][2]," ",i['bbox'][3]," ") print("generated predictions for ", count, " images.") return results
def main(): setup_default_logging() args, args_text = _parse_args() args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.distributed: logging.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: logging.info('Training with a single process on 1 GPU.') #torch.manual_seed(args.seed + args.rank) # create model config = get_efficientdet_config(args.model) config.redundant_bias = args.redundant_bias # redundant conv + BN bias layers (True to match official models) model = EfficientDet(config) if args.initial_checkpoint: load_checkpoint(model, args.initial_checkpoint) config.num_classes = 5 model.class_net.predict.conv_pw = create_conv2d(config.fpn_channels, 9 * 5, 1, padding=config.pad_type, bias=True) variance_scaling(model.class_net.predict.conv_pw.weight) model.class_net.predict.conv_pw.bias.data.fill_(-math.log((1 - 0.01) / 0.01)) model = DetBenchTrain(model, config) model.cuda() print(model.model.class_net.predict.conv_pw) # FIXME create model factory, pretrained zoo # model = create_model( # args.model, # pretrained=args.pretrained, # num_classes=args.num_classes, # drop_rate=args.drop, # drop_connect_rate=args.drop_connect, # DEPRECATED, use drop_path # drop_path_rate=args.drop_path, # drop_block_rate=args.drop_block, # global_pool=args.gp, # bn_tf=args.bn_tf, # bn_momentum=args.bn_momentum, # bn_eps=args.bn_eps, # checkpoint_path=args.initial_checkpoint) if args.local_rank == 0: logging.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) optimizer = create_optimizer(args, model) use_amp = False if has_apex and args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True if args.local_rank == 0: logging.info('NVIDIA APEX {}. AMP {}.'.format( 'installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) # optionally resume from a checkpoint resume_state = {} resume_epoch = None if args.resume: resume_state, resume_epoch = resume_checkpoint(_unwrap_bench(model), args.resume) if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: if args.local_rank == 0: logging.info('Restoring Optimizer state from checkpoint') optimizer.load_state_dict(resume_state['optimizer']) if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if args.local_rank == 0: logging.info('Restoring NVIDIA AMP state from checkpoint') amp.load_state_dict(resume_state['amp']) del resume_state model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEma(model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '') #resume=args.resume) # FIXME bit of a mess with bench if args.resume: load_checkpoint(_unwrap_bench(model_ema), args.resume, use_ema=True) if args.distributed: if args.sync_bn: try: if has_apex: model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( model) if args.local_rank == 0: logging.info( 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.' ) except Exception as e: logging.error( 'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1' ) if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: logging.info( "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP." ) model = DDP(model, device_ids=[args.local_rank ]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP lr_scheduler, num_epochs = create_scheduler(args, optimizer) start_epoch = 0 if args.start_epoch is not None: # a specified start_epoch will always override the resume epoch start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: logging.info('Scheduled epochs: {}'.format(num_epochs)) train_anno_set = 'train_small' train_annotation_path = os.path.join(args.data, 'annotations_small', f'train_annotations.json') train_image_dir = train_anno_set dataset_train = CocoDetection(os.path.join(args.data, train_image_dir), train_annotation_path) # FIXME cutmix/mixup worth investigating? # collate_fn = None # if args.prefetcher and args.mixup > 0: # collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes) loader_train = create_loader( dataset_train, input_size=config.image_size, batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, #re_prob=args.reprob, # FIXME add back various augmentations #re_mode=args.remode, #re_count=args.recount, #re_split=args.resplit, #color_jitter=args.color_jitter, #auto_augment=args.aa, interpolation=args.train_interpolation, #mean=data_config['mean'], #std=data_config['std'], num_workers=args.workers, distributed=args.distributed, #collate_fn=collate_fn, pin_mem=args.pin_mem, ) #train_anno_set = 'valid_small' #train_annotation_path = os.path.join(args.data, 'annotations_small', f'valid_annotations.json') #train_image_dir = train_anno_set dataset_eval = CocoDetection(os.path.join(args.data, train_image_dir), train_annotation_path) loader_eval = create_loader( dataset_eval, input_size=config.image_size, batch_size=args.validation_batch_size_multiplier * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=args.interpolation, #mean=data_config['mean'], #std=data_config['std'], num_workers=args.workers, #distributed=args.distributed, pin_mem=args.pin_mem, ) evaluator = COCOEvaluator(dataset_eval.coco, distributed=args.distributed) eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None output_dir = '' if args.local_rank == 0: output_base = args.output if args.output else './output' exp_name = '-'.join( [datetime.now().strftime("%Y%m%d-%H%M%S"), args.model]) output_dir = get_outdir(output_base, 'train', exp_name) decreasing = True if eval_metric == 'loss' else False saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing) with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: f.write(args_text) try: for epoch in range(start_epoch, num_epochs): if args.distributed: loader_train.sampler.set_epoch(epoch) train_metrics = train_epoch(epoch, model, loader_train, optimizer, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, use_amp=use_amp, model_ema=model_ema) if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: logging.info( "Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(model, loader_eval, args, evaluator) if model_ema is not None and not args.model_ema_force_cpu: if args.distributed and args.dist_bn in ('broadcast', 'reduce'): distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') ema_eval_metrics = validate(model_ema.ema, loader_eval, args, log_suffix=' (EMA)') eval_metrics = ema_eval_metrics if lr_scheduler is not None: # step LR for next epoch lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) if saver is not None: update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint( _unwrap_bench(model), optimizer, args, epoch=epoch, model_ema=_unwrap_bench(model_ema), metric=save_metric, use_amp=use_amp) except KeyboardInterrupt: pass if best_metric is not None: logging.info('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))
def main(): args = parser.parse_args() print(args) if args.img_size is None: args.img_size, args.crop_pct = get_image_size_crop_pct(args.model) if not args.checkpoint and not args.pretrained: args.pretrained = True if args.torchscript: geffnet.config.set_scriptable(True) # create model model = geffnet.create_model(args.model, num_classes=args.num_classes, in_chans=3, pretrained=args.pretrained, checkpoint_path=args.checkpoint) if args.torchscript: torch.jit.optimized_execution(True) model = torch.jit.script(model) print('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(model, args) criterion = nn.CrossEntropyLoss() if not args.no_cuda: if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range( args.num_gpu))).cuda() else: model = model.cuda() criterion = criterion.cuda() if args.tune: model.eval() model.fuse_model() conf_yaml = "conf_" + args.model + ".yaml" from lpot import Quantization quantizer = Quantization(conf_yaml) q_model = quantizer(model) exit(0) valdir = os.path.join(args.data, 'val') loader = create_loader(Dataset(valdir, load_bytes=args.tf_preprocessing), input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=not args.no_cuda, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=data_config['crop_pct'], tensorflow_preprocessing=args.tf_preprocessing) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() model.fuse_model() if args.int8: from lpot.utils.pytorch import load new_model = load( os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) else: new_model = model with torch.no_grad(): for i, (input, target) in enumerate(loader): if i >= args.warmup_iterations: start = time.time() if not args.no_cuda: target = target.cuda() input = input.cuda() # compute output output = new_model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if i >= args.warmup_iterations: # measure elapsed time batch_time.update(time.time() - start) if i % args.print_freq == 0: print( 'Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, loss=losses, top1=top1, top5=top5)) if args.iterations > 0 and i >= args.iterations + args.warmup_iterations - 1: break print('Batch size = %d' % args.batch_size) if args.batch_size == 1: print('Latency: %.3f ms' % (batch_time.avg * 1000)) print('Throughput: %.3f images/sec' % (args.batch_size / batch_time.avg)) print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}'.format( top1=(top1.avg / 100), top5=(top5.avg / 100)))
def validate(args): # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher args.redundant_bias = not args.no_redundant_bias # create model config = get_efficientdet_config(args.model) config.redundant_bias = args.redundant_bias model = EfficientDet(config) if args.checkpoint: load_checkpoint(model, args.checkpoint) param_count = sum([m.numel() for m in model.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = DetBenchEval(model, config) bench = bench.cuda() if has_amp: print('Using AMP mixed precision.') bench = amp.initialize(bench, opt_level='O1') else: print('AMP not installed, running network in FP32.') if args.num_gpu > 1: bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = 'test2017' else: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path) loader = create_loader(dataset, input_size=config.image_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, fill_color=args.fill_color, num_workers=args.workers, pin_mem=args.pin_mem) img_ids = [] results = [] model.eval() batch_time = AverageMeter() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): output = bench(input, target['scale']) output = output.cpu() sample_ids = target['img_id'].cpu() for index, sample in enumerate(output): image_id = int(sample_ids[index]) for det in sample: score = float(det[4]) if score < .001: # stop when below this threshold, scores in descending order break coco_det = dict(image_id=image_id, bbox=det[0:4].tolist(), score=score, category_id=int(det[5])) img_ids.append(image_id) results.append(coco_det) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, )) json.dump(results, open(args.results, 'w'), indent=4) if 'test' not in args.anno: coco_results = dataset.coco.loadRes(args.results) coco_eval = COCOeval(dataset.coco, coco_results, 'bbox') coco_eval.params.imgIds = img_ids # score only ids we've used coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return results
def eval_model(model_name, paper_model_name, paper_arxiv_id, batch_size=64, model_description=''): # create model bench = create_model( model_name, bench_task='predict', pretrained=True, ) bench.eval() input_size = bench.config.image_size param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (model_name, param_count)) bench = bench.cuda() if has_amp: print('Using AMP mixed precision.') bench = amp.initialize(bench, opt_level='O1') else: print('AMP not installed, running network in FP32.') annotation_path = os.path.join(DATA_ROOT, 'annotations', f'instances_{ANNO_SET}.json') evaluator = COCOEvaluator( root=DATA_ROOT, model_name=paper_model_name, model_description=model_description, paper_arxiv_id=paper_arxiv_id) dataset = CocoDetection(os.path.join(DATA_ROOT, ANNO_SET), annotation_path) loader = create_loader( dataset, input_size=input_size, batch_size=batch_size, use_prefetcher=True, fill_color='mean', num_workers=4, pin_mem=True) iterator = tqdm.tqdm(loader, desc="Evaluation", mininterval=5) evaluator.reset_time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): output = bench(input, target['img_scale'], target['img_size']) output = output.cpu() sample_ids = target['img_id'].cpu() results = [] for index, sample in enumerate(output): image_id = int(sample_ids[index]) for det in sample: score = float(det[4]) if score < .001: # stop when below this threshold, scores in descending order break coco_det = dict( image_id=image_id, bbox=det[0:4].tolist(), score=score, category_id=int(det[5])) results.append(coco_det) evaluator.add(results) if evaluator.cache_exists: break evaluator.save()
def main(): args = parser.parse_args() # create model model = create_model( args.model, num_classes=args.num_classes, in_chans=3, pretrained=args.pretrained, checkpoint_path=args.checkpoint) print('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) config = resolve_data_config(model, args) model, test_time_pool = apply_test_time_pool(model, config, args) if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model = model.cuda() loader = create_loader( Dataset(args.data), input_size=config['input_size'], batch_size=args.batch_size, use_prefetcher=True, interpolation=config['interpolation'], mean=config['mean'], std=config['std'], num_workers=args.workers, crop_pct=1.0 if test_time_pool else config['crop_pct']) model.eval() k = min(args.topk, args.num_classes) batch_time = AverageMeter() end = time.time() topk_ids = [] with torch.no_grad(): for batch_idx, (input, _) in enumerate(loader): input = input.cuda() labels = model(input) topk = labels.topk(k)[1] topk_ids.append(topk.cpu().numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.print_freq == 0: print('Predict: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( batch_idx, len(loader), batch_time=batch_time)) topk_ids = np.concatenate(topk_ids, axis=0).squeeze() with open(os.path.join(args.output_dir, './topk_ids.csv'), 'w') as out_file: filenames = loader.dataset.filenames() for filename, label in zip(filenames, topk_ids): filename = os.path.basename(filename) out_file.write('{0},{1},{2},{3},{4},{5}\n'.format( filename, label[0], label[1], label[2], label[3], label[4]))
def validate(args): setup_default_logging() # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher # create model bench = create_model(args.model, bench_task='predict', pretrained=args.pretrained, redundant_bias=args.redundant_bias, checkpoint_path=args.checkpoint, checkpoint_ema=args.use_ema) input_size = bench.config.image_size param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = bench.cuda() if has_amp: print('Using AMP mixed precision.') bench = amp.initialize(bench, opt_level='O1') else: print('AMP not installed, running network in FP32.') if args.num_gpu > 1: bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = args.anno else: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path) loader = create_loader(dataset, input_size=input_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, fill_color=args.fill_color, num_workers=args.workers, mean=args.mean, std=args.std, pin_mem=args.pin_mem) img_ids = [] results = [] bench.eval() for i, (input, target) in enumerate(loader, 1): dumm_inp = input tisc = target['img_scale'] tisz = target['img_size'] break starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event( enable_timing=True) # repetitions = 300 # timings=np.zeros((repetitions,1)) #GPU-WARM-UP # print(enumerate()) for _ in range(10): _ = bench(dumm_inp, tisc, tisz) # MEASURE PERFORMANCE # dummy_input = torch.randn(1, 3,bench.config.image_size,bench.config.image_size,dtype=torch.float).to("cuda") print("starting") batch_time = AverageMeter() # end = time.time() with torch.no_grad(): for _ in range(2000): starter.record() _ = bench(dumm_inp, tisc, tisz) ender.record() # measure elapsed time torch.cuda.synchronize() curr_time = starter.elapsed_time(ender) batch_time.update(curr_time) # print(curr_time) # end = time.time() # if i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}ms ({batch_time.avg:.3f}ms, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=dumm_inp.size(0) / batch_time.avg, )) # json.dump(results, open(args.results, 'w'), indent=4) # if 'test' not in args.anno: # coco_results = dataset.coco.loadRes(args.results) # coco_eval = COCOeval(dataset.coco, coco_results, 'bbox') # coco_eval.params.imgIds = img_ids # score only ids we've used # coco_eval.evaluate() # coco_eval.accumulate() # coco_eval.summarize() return results