def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="resnet18", type=str) parser.add_argument("-d", "--data", default=None, type=str) parser.add_argument("-s", "--save", default="/data/models", type=str) parser.add_argument( "-c", "--checkpoint", default=None, type=str, help="pretrained model to finetune", ) parser.add_argument( "-m", "--mode", default="qat", type=str, choices=["normal", "qat", "quantized"], help="Quantization Mode\n" "normal: no quantization, using float32\n" "qat: quantization aware training, simulate int8\n" "quantized: convert mode to int8 quantized, inference only", ) parser.add_argument("-n", "--ngpus", default=None, type=int) parser.add_argument("-w", "--workers", default=4, type=int) parser.add_argument("--report-freq", default=50, type=int) args = parser.parse_args() world_size = (dist.helper.get_device_count_by_fork("gpu") if args.ngpus is None else args.ngpus) world_size = 1 if world_size == 0 else world_size test_proc = dist.launcher(worker) if world_size > 1 else worker test_proc(world_size, args)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="resnet18", type=str) parser.add_argument("-d", "--data", default=None, type=str) parser.add_argument("-s", "--save", default="/data/models", type=str) parser.add_argument( "-c", "--checkpoint", default=None, type=str, help="pretrained model to finetune", ) parser.add_argument("-n", "--ngpus", default=None, type=int) parser.add_argument("-w", "--workers", default=4, type=int) parser.add_argument("--report-freq", default=50, type=int) args = parser.parse_args() world_size = ( dist.helper.get_device_count_by_fork("gpu") if args.ngpus is None else args.ngpus ) world_size = 1 if world_size == 0 else world_size if world_size != 1: logger.warning( "Calibration only supports single GPU now, %d provided", world_size ) proc_func = dist.launcher(worker) if world_size > 1 else worker proc_func(world_size, args)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="resnet18", type=str) parser.add_argument("-d", "--data", default=None, type=str) parser.add_argument("-s", "--save", default="/data/models", type=str) parser.add_argument( "-m", "--mode", default="normal", type=str, choices=["normal", "qat"], help="Quantization Mode\n" "normal: no quantization, using float32\n" "qat: quantization aware training, simulate int8", ) parser.add_argument("-n", "--ngpus", default=None, type=int) parser.add_argument("-w", "--workers", default=4, type=int) parser.add_argument("--report-freq", default=50, type=int) args = parser.parse_args() world_size = (dist.helper.get_device_count_by_fork("gpu") if args.ngpus is None else args.ngpus) if world_size == 0: raise ValueError("Please train use GPU") train_proc = dist.launcher(worker) if world_size > 1 else worker train_proc(world_size, args)
def main(): parser = argparse.ArgumentParser(description="MegEngine ImageNet Training") parser.add_argument("-d", "--data", metavar="DIR", help="path to imagenet dataset") parser.add_argument( "-a", "--arch", default="resnet50", help="model architecture (default: resnet50)", ) parser.add_argument( "-n", "--ngpus", default=None, type=int, help="number of GPUs per node (default: None, use all available GPUs)", ) parser.add_argument("-m", "--model", metavar="PKL", default=None, help="path to model checkpoint") parser.add_argument("-j", "--workers", default=2, type=int) parser.add_argument( "-p", "--print-freq", default=20, type=int, metavar="N", help="print frequency (default: 10)", ) parser.add_argument("--dist-addr", default="localhost") parser.add_argument("--dist-port", default=23456, type=int) parser.add_argument("--world-size", default=1, type=int) parser.add_argument("--rank", default=0, type=int) args = parser.parse_args() if args.ngpus is None: args.ngpus = dist.helper.get_device_count_by_fork("gpu") if args.world_size * args.ngpus > 1: dist_worker = dist.launcher(master_ip=args.dist_addr, port=args.dist_port, world_size=args.world_size * args.ngpus, rank_start=args.rank * args.ngpus, n_gpus=args.ngpus)(worker) dist_worker(args) else: worker(args)
def main(): parser = make_parser() args = parser.parse_args() # ------------------------ begin training -------------------------- # logger.info("Device Count = %d", args.devices) log_dir = "log-of-{}".format(os.path.basename(args.file).split(".")[0]) if not os.path.isdir(log_dir): os.makedirs(log_dir) if args.devices > 1: trainer = dist.launcher(worker, n_gpus=args.devices) trainer(args) else: worker(args)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", default="net.py", type=str, help="net description file") parser.add_argument( "-w", "--weight_file", default=None, type=str, help="weights file", ) parser.add_argument( "-n", "--devices", default=1, type=int, help="total number of gpus for testing", ) parser.add_argument( "-d", "--dataset_dir", default="/data/datasets", type=str, ) args = parser.parse_args() current_network = import_from_file(args.file) cfg = current_network.Cfg() if args.devices > 1: dist_worker = dist.launcher(n_gpus=args.devices)(worker) result_list = dist_worker(current_network, args.weight_file, args.dataset_dir) result_list = sum(result_list, []) else: result_list = worker(current_network, args.weight_file, args.dataset_dir) if cfg.val_save_path is not None: save_results(result_list, cfg.val_save_path, cfg) logger.info("Start evaluation!") compute_metric(result_list, cfg)
def main(): parser = argparse.ArgumentParser() parser.add_argument( "-a", "--arch", default="simplebaseline_res50", type=str, choices=cfg.model_choices, ) parser.add_argument("-s", "--save", default="/data/models", type=str) parser.add_argument("-b", "--batch_size", default=32, type=int) parser.add_argument("-lr", "--initial_lr", default=3e-4, type=float) parser.add_argument("--resume", default=None, type=str) parser.add_argument("--multi_scale_supervision", action="store_true") parser.add_argument("-n", "--ngpus", default=8, type=int) parser.add_argument("-w", "--workers", default=8, type=int) args = parser.parse_args() model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1]) save_dir = os.path.join(args.save, model_name) if not os.path.exists(save_dir): os.makedirs(save_dir) mge.set_log_file(os.path.join(save_dir, "log.txt")) if args.batch_size != cfg.batch_size: cfg.batch_size = args.batch_size if args.initial_lr != cfg.initial_lr: cfg.initial_lr = args.initial_lr if args.ngpus is None: args.ngpus = dist.helper.get_device_count_by_fork("gpu") if args.ngpus > 1: # scale learning rate by number of gpus cfg.weight_decay *= args.ngpus dist_worker = dist.launcher(n_gpus=args.ngpus)(worker) dist_worker(args) else: worker(args)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", default="net.py", type=str, help="net description file") parser.add_argument("-n", "--devices", type=int, default=8, help="batch size for training") parser.add_argument( "-d", "--dataset_dir", type=str, default="/data/datasets", ) parser.add_argument("-r", "--resume", type=str, default=None, help="resume model file") args = parser.parse_args() # ------------------------ begin training -------------------------- # logger.info("Device Count = %d", args.devices) log_dir = "log-of-{}".format(os.path.basename(args.file).split(".")[0]) if not os.path.isdir(log_dir): os.makedirs(log_dir) if args.devices > 1: trainer = dist.launcher(worker, n_gpus=args.devices) trainer(args) else: worker(args)
manager = Manager(model=model, optimizer=optimizer, scheduler=scheduler, params=params, dataloaders=dataloaders, writer=writter, logger=logger) # Train the model if rank == 0: logger.info("Starting training for {} epoch(s)".format( params.num_epochs)) train_and_evaluate(model, manager) if __name__ == '__main__': # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) with open(json_path) as f: params = EasyDict(json.load(f)) params.update(vars(args)) train_proc = dist.launcher( main) if mge.device.get_device_count("gpu") > 1 else main train_proc(params)
def main(): parser = argparse.ArgumentParser(description="MegEngine NBNet") parser.add_argument("-d", "--data", default="/data/sidd", metavar="DIR", help="path to sidd dataset") parser.add_argument("--dnd", action='store_true', help="training for dnd benchmark") parser.add_argument( "-a", "--arch", default="NBNet", ) parser.add_argument( "-n", "--ngpus", default=None, type=int, help="number of GPUs per node (default: None, use all available GPUs)", ) parser.add_argument( "--save", metavar="DIR", default="output", help="path to save checkpoint and log", ) parser.add_argument( "--epochs", default=70, type=int, help="number of total epochs to run (default: 70)", ) parser.add_argument( "--steps_per_epoch", default=10000, type=int, help="number of steps for one epoch (default: 10000)", ) parser.add_argument( "-b", "--batch-size", metavar="SIZE", default=32, type=int, help="total batch size (default: 32)", ) parser.add_argument( "--lr", "--learning-rate", metavar="LR", default=2e-4, type=float, help="learning rate for single GPU (default: 0.0002)", ) parser.add_argument( "--weight-decay", default=1e-8, type=float, help="weight decay" ) parser.add_argument("-j", "--workers", default=8, type=int) parser.add_argument( "-p", "--print-freq", default=10, type=int, metavar="N", help="print frequency (default: 10)", ) args = parser.parse_args() # pylint: disable=unused-variable # noqa: F841 # get device count if args.ngpus: ngpus_per_node = args.ngpus # launch processes train_proc = dist.launcher(worker) if ngpus_per_node > 1 else worker train_proc(args)
def main(): # pylint: disable=import-outside-toplevel,too-many-branches,too-many-statements from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval parser = make_parser() args = parser.parse_args() current_network = import_from_file(args.file) cfg = current_network.Cfg() if args.weight_file: args.start_epoch = args.end_epoch = -1 else: if args.start_epoch == -1: args.start_epoch = cfg.max_epoch - 1 if args.end_epoch == -1: args.end_epoch = args.start_epoch assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch for epoch_num in range(args.start_epoch, args.end_epoch + 1): if args.weight_file: weight_file = args.weight_file else: weight_file = "log-of-{}/epoch_{}.pkl".format( os.path.basename(args.file).split(".")[0], epoch_num) if args.devices > 1: dist_worker = dist.launcher(n_gpus=args.devices)(worker) result_list = dist_worker(current_network, weight_file, args.dataset_dir) result_list = sum(result_list, []) else: result_list = worker(current_network, weight_file, args.dataset_dir) all_results = DetEvaluator.format(result_list, cfg) if args.weight_file: json_path = "{}_{}.json".format( os.path.basename(args.file).split(".")[0], os.path.basename(args.weight_file).split(".")[0], ) else: json_path = "log-of-{}/epoch_{}.json".format( os.path.basename(args.file).split(".")[0], epoch_num) all_results = json.dumps(all_results) with open(json_path, "w") as fo: fo.write(all_results) logger.info("Save results to %s, start evaluation!", json_path) eval_gt = COCO( os.path.join(args.dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"])) eval_dt = eval_gt.loadRes(json_path) cocoEval = COCOeval(eval_gt, eval_dt, iouType="bbox") cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() metrics = [ "AP", "[email protected]", "[email protected]", "APs", "APm", "APl", "AR@1", "AR@10", "AR@100", "ARs", "ARm", "ARl", ] logger.info("mmAP".center(32, "-")) for i, m in enumerate(metrics): logger.info("|\t%s\t|\t%.03f\t|", m, cocoEval.stats[i]) logger.info("-" * 32)
def main(): from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval parser = make_parser() args = parser.parse_args() model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1]) save_dir = os.path.join(args.save_dir, model_name) if not os.path.exists(save_dir): os.makedirs(save_dir) mge.set_log_file(os.path.join(save_dir, "log.txt")) args.ngpus = (dist.helper.get_device_count_by_fork("gpu") if args.ngpus is None else args.ngpus) cfg.batch_size = cfg.batch_size if args.batch_size is None else args.batch_size dt_path = os.path.join(cfg.data_root, "person_detection_results", args.dt_file) dets = json.load(open(dt_path, "r")) gt_path = os.path.join(cfg.data_root, "annotations", "person_keypoints_val2017.json") eval_gt = COCO(gt_path) gt = eval_gt.dataset dets = [ i for i in dets if (i["image_id"] in eval_gt.imgs and i["category_id"] == 1) ] ann_file = {"images": gt["images"], "annotations": dets} if args.end_epoch == -1: args.end_epoch = args.start_epoch for epoch_num in range(args.start_epoch, args.end_epoch + 1, args.test_freq): if args.model: model_file = args.model else: model_file = "{}/epoch_{}.pkl".format(args.model_dir, epoch_num) logger.info("Load Model : %s completed", model_file) dist_worker = dist.launcher(n_gpus=args.ngpus)(worker) all_results = dist_worker(args.arch, model_file, cfg.data_root, ann_file) all_results = sum(all_results, []) json_name = "log-of-{}_epoch_{}.json".format(args.arch, epoch_num) json_path = os.path.join(save_dir, json_name) all_results = json.dumps(all_results) with open(json_path, "w") as fo: fo.write(all_results) logger.info("Save to %s finished, start evaluation!", json_path) eval_dt = eval_gt.loadRes(json_path) cocoEval = COCOeval(eval_gt, eval_dt, iouType="keypoints") cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() metrics = [ "AP", "[email protected]", "[email protected]", "APm", "APl", "AR", "[email protected]", "[email protected]", "ARm", "ARl", ] logger.info("mmAP".center(32, "-")) for i, m in enumerate(metrics): logger.info("|\t%s\t|\t%.03f\t|", m, cocoEval.stats[i]) logger.info("-" * 32)
def main(): parser = argparse.ArgumentParser(description="MegEngine ImageNet Training") parser.add_argument("-d", "--data", metavar="DIR", help="path to imagenet dataset") parser.add_argument( "-a", "--arch", default="resnet50", help="model architecture (default: resnet50)", ) parser.add_argument( "-n", "--ngpus", default=None, type=int, help="number of GPUs per node (default: None, use all available GPUs)", ) parser.add_argument( "--save", metavar="DIR", default="output", help="path to save checkpoint and log", ) parser.add_argument( "--epochs", default=90, type=int, help="number of total epochs to run (default: 90)", ) parser.add_argument( "-b", "--batch-size", metavar="SIZE", default=64, type=int, help="batch size for single GPU (default: 64)", ) parser.add_argument( "--lr", "--learning-rate", metavar="LR", default=0.025, type=float, help="learning rate for single GPU (default: 0.025)", ) parser.add_argument("--momentum", default=0.9, type=float, help="momentum (default: 0.9)") parser.add_argument("--weight-decay", default=1e-4, type=float, help="weight decay (default: 1e-4)") parser.add_argument("-j", "--workers", default=2, type=int) parser.add_argument( "-p", "--print-freq", default=20, type=int, metavar="N", help="print frequency (default: 20)", ) parser.add_argument("--dist-addr", default="localhost") parser.add_argument("--dist-port", default=23456, type=int) parser.add_argument("--world-size", default=1, type=int) parser.add_argument("--rank", default=0, type=int) args = parser.parse_args() if args.ngpus is None: args.ngpus = dist.helper.get_device_count_by_fork("gpu") if args.world_size * args.ngpus > 1: dist_worker = dist.launcher(master_ip=args.dist_addr, port=args.dist_port, world_size=args.world_size * args.ngpus, rank_start=args.rank * args.ngpus, n_gpus=args.ngpus)(worker) dist_worker(args) else: worker(args)