def main(args): start_t = time.time() alexnet_module = alexnet() end_t = time.time() print("init time : {}".format(end_t - start_t)) start_t = time.time() pretrain_models = flow.load(args.model_path) alexnet_module.load_state_dict(pretrain_models) end_t = time.time() print("load params time : {}".format(end_t - start_t)) alexnet_module.eval() alexnet_module.to("cuda") start_t = time.time() image = load_image(args.image_path) image = flow.Tensor(image, device=flow.device("cuda")) predictions = alexnet_module(image).softmax() predictions = predictions.numpy() end_t = time.time() print("infer time : {}".format(end_t - start_t)) clsidx = np.argmax(predictions) print("predict prob: %f, class name: %s" % (np.max(predictions), clsidx_2_labels[clsidx]))
def get_architecture(arch: str) -> torch.nn.Module: """ Return a neural network (with random weights) :param arch: the architecture - should be in the ARCHITECTURES list above :return: a Pytorch module """ if arch == 'lenet': model = lenet() elif arch == 'alexnet': model = alexnet() elif arch == 'resnet20': model = resnet20() elif arch == 'resnt26': model = resnet26() elif arch == 'resnet32': model = resnet32() elif arch == 'resnet110': model = resnet110() elif arch == 'densenet': model = densenet_BC_cifar(depth=100, k=12) elif arch == 'vgg16': model = vgg16() elif arch == 'vgg19': model = vgg19() else: raise ValueError('arch not in ARCHITECTURES') return model
def build_model(model_type=0): #build model model = alexnet(num_classes=cfg.class_number) model.cuda() criterion = myloss().cuda() #criterion = nn.functional.nll_loss().cuda() #criterion = nn.CrossEntropyLoss().cuda() return model, criterion
def main(args): start_t = time.time() alexnet_module = alexnet() end_t = time.time() print("init time : {}".format(end_t - start_t)) start_t = time.time() pretrain_models = flow.load(args.model_path) alexnet_module.load_state_dict(pretrain_models) end_t = time.time() print("load params time : {}".format(end_t - start_t)) alexnet_module.eval() alexnet_module.to("cuda") class AlexNetEvalGraph(flow.nn.Graph): def __init__(self): super().__init__() self.alexnet = alexnet_module def build(self, image): with flow.no_grad(): predictions = self.alexnet(image) return predictions alexnet_eval_graph = AlexNetEvalGraph() start_t = time.time() image = load_image(args.image_path) image = flow.Tensor(image, device=flow.device("cuda")) predictions = alexnet_eval_graph(image).softmax() predictions = predictions.numpy() end_t = time.time() print("infer time : {}".format(end_t - start_t)) clsidx = np.argmax(predictions) print("predict prob: %f, class name: %s" % (np.max(predictions), clsidx_2_labels[clsidx]))
from profiling import Profiling # from profiling import record import model.alexnet as alexnet import torch import torch.nn as nn from torch.autograd import Variable # Iteration number iter = 3 # # Create model # model = alexnet.alexnet() # # Use case 1: use it as context-manager # # profiler will measure the following 3 iterations. with Profiling(model) as p: for i in xrange(iter): # Forward: output = model.forward( Variable(torch.ones(2, 3, 224, 224), requires_grad=True)) # Backward: grads = torch.ones(2, 1000) output.backward(grads)
args.use_cuda and torch.cuda.is_available()) else "cpu") torch.cuda.empty_cache() ####### Pick according model if args.model_name == 'res50': model = resnet.resnet50().to(device) elif args.model_name == 'res101': model = resnet.resnet101().to(device) elif args.model_name == 'res152': model = resnet.resnet152().to(device) elif args.model_name == 'res34': model = resnet.resnet34().to(device) elif args.model_name == 'res18': model = resnet.resnet18().to(device) elif args.model_name == 'alexnet': model = alexnet.alexnet().to(device) else: print('Wrong Model Name') ########### Whether to parallel the model if args.use_cuda: if args.parallel: model = nn.DataParallel(model) else: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_num ############## Whether to load pretrained model #if args.train_from_scratch: # pass #else: # model.load_state_dict(torch.load(args.pretrained_model_path))
def setup(args): train_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="train", dataset_size=9469, batch_size=args.train_batch_size, ) val_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="val", dataset_size=3925, batch_size=args.val_batch_size, ) criterion = flow.nn.CrossEntropyLoss() # model setup eager_model = alexnet() graph_model = alexnet() graph_model.load_state_dict(eager_model.state_dict()) eager_model.to("cuda") graph_model.to("cuda") # optimizer setup eager_optimizer = flow.optim.SGD(eager_model.parameters(), lr=args.learning_rate, momentum=args.mom) graph_optimizer = flow.optim.SGD(graph_model.parameters(), lr=args.learning_rate, momentum=args.mom) # criterion setup criterion = flow.nn.CrossEntropyLoss() criterion = criterion.to("cuda") class ModelTrainGraph(flow.nn.Graph): def __init__(self): super().__init__() self.graph_model = graph_model self.criterion = criterion self.add_optimizer(graph_optimizer) def build(self, image, label): logits = self.graph_model(image) loss = self.criterion(logits, label) loss.backward() return loss class ModelEvalGraph(flow.nn.Graph): def __init__(self): super().__init__() self.graph_model = graph_model def build(self, image): with flow.no_grad(): logits = self.graph_model(image) predictions = logits.softmax() return predictions model_train_graph = ModelTrainGraph() model_eval_graph = ModelEvalGraph() dic = { "train_dataloader": train_data_loader, "val_dataloader": val_data_loader, "eager": [eager_model, eager_optimizer, criterion], "graph": [graph_model, model_train_graph, model_eval_graph], } return dic
def main(args): # Data Setup train_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="train", dataset_size=9469, batch_size=args.train_batch_size, ) val_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="val", dataset_size=3925, batch_size=args.val_batch_size, ) # Model Setup print("***** Initialization *****") start_t = time.time() model = alexnet() if args.load_checkpoint != "": print("load_checkpoint >>>>>>>>> ", args.load_checkpoint) model.load_state_dict(flow.load(args.load_checkpoint)) end_t = time.time() print("init time : {}".format(end_t - start_t)) # Training Setup criterion = flow.nn.CrossEntropyLoss() model.to("cuda") criterion.to("cuda") optimizer = flow.optim.SGD( model.parameters(), lr=args.learning_rate, momentum=args.mom, weight_decay=args.weight_decay, ) lr_scheduler = flow.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) loss_list = [] accuracy_list = [] best_acc = 0.0 for epoch in range(args.epochs): print("***** Runing Training *****") train_loss = train_one_epoch(args, model, criterion, train_data_loader, optimizer, epoch, lr_scheduler) print("***** Run Validation *****") accuracy = valid(args, model, criterion, val_data_loader) # save model after each epoch print("***** Save Checkpoint *****") save_path = os.path.join(args.save_checkpoint_path, "epoch_%d_val_acc_%f" % (epoch, accuracy)) save_checkpoint(model, save_path) print("Save checkpoint to: ", save_path) # save best model if best_acc < accuracy: save_path = os.path.join(args.save_checkpoint_path, "best_model") if os.path.exists(save_path): shutil.rmtree(save_path, True) save_checkpoint(model, save_path) best_acc = accuracy loss_list.append(train_loss) accuracy_list.append(accuracy) print("End Training!") print("Max Accuracy: ", best_acc) # saving training information print("***** Save Logs *****") save_logs(loss_list, "eager/losses.txt") print("Save loss info to: ", "eager/losses.txt") save_logs(accuracy_list, "eager/accuracy.txt") print("Save acc info to: ", "eager/accuracy.txt")
def main(args): # path setup training_results_path = os.path.join(args.results, args.tag) os.makedirs(training_results_path, exist_ok=True) # build dataloader train_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="train", dataset_size=9469, batch_size=args.train_batch_size, ) val_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="val", dataset_size=3925, batch_size=args.val_batch_size, ) # oneflow init start_t = time.time() alexnet_module = alexnet() if args.load_checkpoint != "": print("load_checkpoint >>>>>>>>> ", args.load_checkpoint) alexnet_module.load_state_dict(flow.load(args.load_checkpoint)) end_t = time.time() print("init time : {}".format(end_t - start_t)) of_cross_entropy = flow.nn.CrossEntropyLoss() alexnet_module.to("cuda") of_cross_entropy.to("cuda") of_sgd = flow.optim.SGD(alexnet_module.parameters(), lr=args.learning_rate, momentum=args.mom) class AlexNetGraph(flow.nn.Graph): def __init__(self): super().__init__() self.alexnet = alexnet_module self.cross_entropy = of_cross_entropy self.add_optimizer(of_sgd) self.train_data_loader = train_data_loader def build(self): image, label = self.train_data_loader() image = image.to("cuda") label = label.to("cuda") logits = self.alexnet(image) loss = self.cross_entropy(logits, label) loss.backward() return loss alexnet_graph = AlexNetGraph() class AlexNetEvalGraph(flow.nn.Graph): def __init__(self): super().__init__() self.alexnet = alexnet_module self.val_data_loader = val_data_loader def build(self): image, label = self.val_data_loader() image = image.to("cuda") with flow.no_grad(): logits = self.alexnet(image) predictions = logits.softmax() return predictions, label alexnet_eval_graph = AlexNetEvalGraph() of_losses = [] of_accuracy = [] all_samples = len(val_data_loader) * args.val_batch_size print_interval = 20 for epoch in range(args.epochs): alexnet_module.train() for b in range(len(train_data_loader)): # oneflow graph train start_t = time.time() loss = alexnet_graph() end_t = time.time() if b % print_interval == 0: l = loss.numpy() of_losses.append(l) print( "epoch {} train iter {} oneflow loss {}, train time : {}". format(epoch, b, l, end_t - start_t)) print("epoch %d train done, start validation" % epoch) alexnet_module.eval() correct_of = 0.0 for b in range(len(val_data_loader)): start_t = time.time() predictions, label = alexnet_eval_graph() of_predictions = predictions.numpy() clsidxs = np.argmax(of_predictions, axis=1) label_nd = label.numpy() for i in range(args.val_batch_size): if clsidxs[i] == label_nd[i]: correct_of += 1 end_t = time.time() top1 = correct_of / all_samples of_accuracy.append(top1) print("epoch %d, oneflow top1 val acc: %f" % (epoch, top1)) flow.save( alexnet_module.state_dict(), os.path.join( args.save_checkpoint_path, "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples), ), ) writer = open("graph/losses.txt", "w") for o in of_losses: writer.write("%f\n" % o) writer.close() writer = open("graph/accuracy.txt", "w") for o in of_accuracy: writer.write("%f\n" % o) writer.close()
def main(): # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Set the random seed for reproducible experiments random.seed(230) torch.manual_seed(230) np.random.seed(230) torch.cuda.manual_seed(230) warnings.filterwarnings("ignore") # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders, considering full-set vs. sub-set scenarios if params.subset_percent < 1.0: train_dl = data_loader.fetch_subset_dataloader('train', params) else: train_dl = data_loader.fetch_dataloader('train', params) dev_dl = data_loader.fetch_dataloader('dev', params) logging.info("- done.") """ Load student and teacher model """ if "distill" in params.model_version: # Specify the student models if params.model_version == "cnn_distill": # 5-layers Plain CNN print("Student model: {}".format(params.model_version)) model = net.Net(params).cuda() elif params.model_version == "shufflenet_v2_distill": print("Student model: {}".format(params.model_version)) model = shufflenet.shufflenetv2(class_num=args.num_class).cuda() elif params.model_version == "mobilenet_v2_distill": print("Student model: {}".format(params.model_version)) model = mobilenet.mobilenetv2(class_num=args.num_class).cuda() elif params.model_version == 'resnet18_distill': print("Student model: {}".format(params.model_version)) model = resnet.ResNet18(num_classes=args.num_class).cuda() elif params.model_version == 'resnet50_distill': print("Student model: {}".format(params.model_version)) model = resnet.ResNet50(num_classes=args.num_class).cuda() elif params.model_version == "alexnet_distill": print("Student model: {}".format(params.model_version)) model = alexnet.alexnet(num_classes=args.num_class).cuda() elif params.model_version == "vgg19_distill": print("Student model: {}".format(params.model_version)) model = models.vgg19_bn(num_classes=args.num_class).cuda() elif params.model_version == "googlenet_distill": print("Student model: {}".format(params.model_version)) model = googlenet.GoogleNet(num_class=args.num_class).cuda() elif params.model_version == "resnext29_distill": print("Student model: {}".format(params.model_version)) model = resnext.CifarResNeXt(cardinality=8, depth=29, num_classes=args.num_class).cuda() elif params.model_version == "densenet121_distill": print("Student model: {}".format(params.model_version)) model = densenet.densenet121(num_class=args.num_class).cuda() # optimizer if params.model_version == "cnn_distill": optimizer = optim.Adam(model.parameters(), lr=params.learning_rate * (params.batch_size / 128)) else: optimizer = optim.SGD(model.parameters(), lr=params.learning_rate * (params.batch_size / 128), momentum=0.9, weight_decay=5e-4) iter_per_epoch = len(train_dl) warmup_scheduler = utils.WarmUpLR( optimizer, iter_per_epoch * args.warm) # warmup the learning rate in the first epoch # specify loss function if args.self_training: print( '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>self training>>>>>>>>>>>>>>>>>>>>>>>>>>>>>' ) loss_fn_kd = loss_kd_self else: loss_fn_kd = loss_kd """ Specify the pre-trained teacher models for knowledge distillation Checkpoints can be obtained by regular training or downloading our pretrained models For model which is pretrained in multi-GPU, use "nn.DaraParallel" to correctly load the model weights. """ if params.teacher == "resnet18": print("Teacher model: {}".format(params.teacher)) teacher_model = resnet.ResNet18(num_classes=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/best.pth.tar' if args.pt_teacher: # poorly-trained teacher for Defective KD experiments teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/0.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "alexnet": print("Teacher model: {}".format(params.teacher)) teacher_model = alexnet.alexnet(num_classes=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_alexnet/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "googlenet": print("Teacher model: {}".format(params.teacher)) teacher_model = googlenet.GoogleNet(num_class=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_googlenet/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "vgg19": print("Teacher model: {}".format(params.teacher)) teacher_model = models.vgg19_bn(num_classes=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_vgg19/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "resnet50": print("Teacher model: {}".format(params.teacher)) teacher_model = resnet.ResNet50(num_classes=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/best.pth.tar' if args.pt_teacher: # poorly-trained teacher for Defective KD experiments teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/50.pth.tar' elif params.teacher == "resnet101": print("Teacher model: {}".format(params.teacher)) teacher_model = resnet.ResNet101(num_classes=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet101/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "densenet121": print("Teacher model: {}".format(params.teacher)) teacher_model = densenet.densenet121( num_class=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_densenet121/best.pth.tar' # teacher_model = nn.DataParallel(teacher_model).cuda() elif params.teacher == "resnext29": print("Teacher model: {}".format(params.teacher)) teacher_model = resnext.CifarResNeXt( cardinality=8, depth=29, num_classes=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/best.pth.tar' if args.pt_teacher: # poorly-trained teacher for Defective KD experiments teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/50.pth.tar' teacher_model = nn.DataParallel(teacher_model).cuda() elif params.teacher == "mobilenet_v2": print("Teacher model: {}".format(params.teacher)) teacher_model = mobilenet.mobilenetv2( class_num=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_mobilenet_v2/best.pth.tar' elif params.teacher == "shufflenet_v2": print("Teacher model: {}".format(params.teacher)) teacher_model = shufflenet.shufflenetv2( class_num=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_shufflenet_v2/best.pth.tar' utils.load_checkpoint(teacher_checkpoint, teacher_model) # Train the model with KD logging.info("Starting training for {} epoch(s)".format( params.num_epochs)) train_and_evaluate_kd(model, teacher_model, train_dl, dev_dl, optimizer, loss_fn_kd, warmup_scheduler, params, args, args.restore_file) # non-KD mode: regular training to obtain a baseline model else: print("Train base model") if params.model_version == "cnn": model = net.Net(params).cuda() elif params.model_version == "mobilenet_v2": print("model: {}".format(params.model_version)) model = mobilenet.mobilenetv2(class_num=args.num_class).cuda() elif params.model_version == "shufflenet_v2": print("model: {}".format(params.model_version)) model = shufflenet.shufflenetv2(class_num=args.num_class).cuda() elif params.model_version == "alexnet": print("model: {}".format(params.model_version)) model = alexnet.alexnet(num_classes=args.num_class).cuda() elif params.model_version == "vgg19": print("model: {}".format(params.model_version)) model = models.vgg19_bn(num_classes=args.num_class).cuda() elif params.model_version == "googlenet": print("model: {}".format(params.model_version)) model = googlenet.GoogleNet(num_class=args.num_class).cuda() elif params.model_version == "densenet121": print("model: {}".format(params.model_version)) model = densenet.densenet121(num_class=args.num_class).cuda() elif params.model_version == "resnet18": model = resnet.ResNet18(num_classes=args.num_class).cuda() elif params.model_version == "resnet50": model = resnet.ResNet50(num_classes=args.num_class).cuda() elif params.model_version == "resnet101": model = resnet.ResNet101(num_classes=args.num_class).cuda() elif params.model_version == "resnet152": model = resnet.ResNet152(num_classes=args.num_class).cuda() elif params.model_version == "resnext29": model = resnext.CifarResNeXt(cardinality=8, depth=29, num_classes=args.num_class).cuda() # model = nn.DataParallel(model).cuda() if args.regularization: print( ">>>>>>>>>>>>>>>>>>>>>>>>Loss of Regularization>>>>>>>>>>>>>>>>>>>>>>>>" ) loss_fn = loss_kd_regularization elif args.label_smoothing: print( ">>>>>>>>>>>>>>>>>>>>>>>>Label Smoothing>>>>>>>>>>>>>>>>>>>>>>>>" ) loss_fn = loss_label_smoothing else: print( ">>>>>>>>>>>>>>>>>>>>>>>>Normal Training>>>>>>>>>>>>>>>>>>>>>>>>" ) loss_fn = nn.CrossEntropyLoss() if args.double_training: # double training, compare to self-KD print( ">>>>>>>>>>>>>>>>>>>>>>>>Double Training>>>>>>>>>>>>>>>>>>>>>>>>" ) checkpoint = 'experiments/pretrained_teacher_models/base_' + str( params.model_version) + '/best.pth.tar' utils.load_checkpoint(checkpoint, model) if params.model_version == "cnn": optimizer = optim.Adam(model.parameters(), lr=params.learning_rate * (params.batch_size / 128)) else: optimizer = optim.SGD(model.parameters(), lr=params.learning_rate * (params.batch_size / 128), momentum=0.9, weight_decay=5e-4) iter_per_epoch = len(train_dl) warmup_scheduler = utils.WarmUpLR(optimizer, iter_per_epoch * args.warm) # Train the model logging.info("Starting training for {} epoch(s)".format( params.num_epochs)) train_and_evaluate(model, train_dl, dev_dl, optimizer, loss_fn, params, args.model_dir, warmup_scheduler, args, args.restore_file)