示例#1
0
def main(args):
    start_t = time.time()
    alexnet_module = alexnet()
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    start_t = time.time()
    pretrain_models = flow.load(args.model_path)
    alexnet_module.load_state_dict(pretrain_models)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    alexnet_module.eval()
    alexnet_module.to("cuda")

    start_t = time.time()
    image = load_image(args.image_path)
    image = flow.Tensor(image, device=flow.device("cuda"))
    predictions = alexnet_module(image).softmax()
    predictions = predictions.numpy()
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))
    clsidx = np.argmax(predictions)
    print("predict prob: %f, class name: %s" %
          (np.max(predictions), clsidx_2_labels[clsidx]))
示例#2
0
def get_architecture(arch: str) -> torch.nn.Module:
    """ Return a neural network (with random weights)
    :param arch: the architecture - should be in the ARCHITECTURES list above
    :return: a Pytorch module
    """
    if arch == 'lenet':
        model = lenet()
    elif arch == 'alexnet':
        model = alexnet()
    elif arch == 'resnet20':
        model = resnet20()
    elif arch == 'resnt26':
        model = resnet26()
    elif arch == 'resnet32':
        model = resnet32()
    elif arch == 'resnet110':
        model = resnet110()
    elif arch == 'densenet':
        model = densenet_BC_cifar(depth=100, k=12)
    elif arch == 'vgg16':
        model = vgg16()
    elif arch == 'vgg19':
        model = vgg19()
    else:
        raise ValueError('arch not in ARCHITECTURES')
    return model
示例#3
0
def build_model(model_type=0):
    #build model
    model = alexnet(num_classes=cfg.class_number)
    model.cuda()
    criterion = myloss().cuda()
    #criterion = nn.functional.nll_loss().cuda()
    #criterion = nn.CrossEntropyLoss().cuda()
    return model, criterion
示例#4
0
def main(args):
    start_t = time.time()
    alexnet_module = alexnet()
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    start_t = time.time()
    pretrain_models = flow.load(args.model_path)
    alexnet_module.load_state_dict(pretrain_models)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    alexnet_module.eval()
    alexnet_module.to("cuda")

    class AlexNetEvalGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.alexnet = alexnet_module

        def build(self, image):
            with flow.no_grad():
                predictions = self.alexnet(image)
            return predictions

    alexnet_eval_graph = AlexNetEvalGraph()

    start_t = time.time()
    image = load_image(args.image_path)
    image = flow.Tensor(image, device=flow.device("cuda"))
    predictions = alexnet_eval_graph(image).softmax()
    predictions = predictions.numpy()
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))
    clsidx = np.argmax(predictions)
    print("predict prob: %f, class name: %s" %
          (np.max(predictions), clsidx_2_labels[clsidx]))
示例#5
0
from profiling import Profiling
# from profiling import record
import model.alexnet as alexnet

import torch
import torch.nn as nn
from torch.autograd import Variable

# Iteration number
iter = 3

#
# Create model
#
model = alexnet.alexnet()

#
# Use case 1: use it as context-manager
#

# profiler will measure the following 3 iterations.
with Profiling(model) as p:
    for i in xrange(iter):
        # Forward:
        output = model.forward(
            Variable(torch.ones(2, 3, 224, 224), requires_grad=True))

        # Backward:
        grads = torch.ones(2, 1000)
        output.backward(grads)
示例#6
0
    args.use_cuda and torch.cuda.is_available()) else "cpu")
torch.cuda.empty_cache()

####### Pick according model
if args.model_name == 'res50':
    model = resnet.resnet50().to(device)
elif args.model_name == 'res101':
    model = resnet.resnet101().to(device)
elif args.model_name == 'res152':
    model = resnet.resnet152().to(device)
elif args.model_name == 'res34':
    model = resnet.resnet34().to(device)
elif args.model_name == 'res18':
    model = resnet.resnet18().to(device)
elif args.model_name == 'alexnet':
    model = alexnet.alexnet().to(device)
else:
    print('Wrong Model Name')

########### Whether to parallel the model
if args.use_cuda:
    if args.parallel:
        model = nn.DataParallel(model)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_num

############## Whether to load pretrained model
#if args.train_from_scratch:
#    pass
#else:
#    model.load_state_dict(torch.load(args.pretrained_model_path))
示例#7
0
def setup(args):
    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        dataset_size=9469,
        batch_size=args.train_batch_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=3925,
        batch_size=args.val_batch_size,
    )

    criterion = flow.nn.CrossEntropyLoss()

    # model setup
    eager_model = alexnet()
    graph_model = alexnet()
    graph_model.load_state_dict(eager_model.state_dict())

    eager_model.to("cuda")
    graph_model.to("cuda")
    # optimizer setup
    eager_optimizer = flow.optim.SGD(eager_model.parameters(),
                                     lr=args.learning_rate,
                                     momentum=args.mom)
    graph_optimizer = flow.optim.SGD(graph_model.parameters(),
                                     lr=args.learning_rate,
                                     momentum=args.mom)

    # criterion setup
    criterion = flow.nn.CrossEntropyLoss()
    criterion = criterion.to("cuda")

    class ModelTrainGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.graph_model = graph_model
            self.criterion = criterion
            self.add_optimizer(graph_optimizer)

        def build(self, image, label):
            logits = self.graph_model(image)
            loss = self.criterion(logits, label)
            loss.backward()
            return loss

    class ModelEvalGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.graph_model = graph_model

        def build(self, image):
            with flow.no_grad():
                logits = self.graph_model(image)
                predictions = logits.softmax()
            return predictions

    model_train_graph = ModelTrainGraph()
    model_eval_graph = ModelEvalGraph()

    dic = {
        "train_dataloader": train_data_loader,
        "val_dataloader": val_data_loader,
        "eager": [eager_model, eager_optimizer, criterion],
        "graph": [graph_model, model_train_graph, model_eval_graph],
    }

    return dic
示例#8
0
def main(args):
    # Data Setup
    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        dataset_size=9469,
        batch_size=args.train_batch_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=3925,
        batch_size=args.val_batch_size,
    )

    # Model Setup
    print("***** Initialization *****")
    start_t = time.time()
    model = alexnet()
    if args.load_checkpoint != "":
        print("load_checkpoint >>>>>>>>> ", args.load_checkpoint)
        model.load_state_dict(flow.load(args.load_checkpoint))
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    # Training Setup
    criterion = flow.nn.CrossEntropyLoss()
    model.to("cuda")
    criterion.to("cuda")
    optimizer = flow.optim.SGD(
        model.parameters(),
        lr=args.learning_rate,
        momentum=args.mom,
        weight_decay=args.weight_decay,
    )
    lr_scheduler = flow.optim.lr_scheduler.StepLR(optimizer,
                                                  step_size=30,
                                                  gamma=0.1)

    loss_list = []
    accuracy_list = []
    best_acc = 0.0
    for epoch in range(args.epochs):
        print("***** Runing Training *****")
        train_loss = train_one_epoch(args, model, criterion, train_data_loader,
                                     optimizer, epoch, lr_scheduler)
        print("***** Run Validation *****")
        accuracy = valid(args, model, criterion, val_data_loader)

        # save model after each epoch
        print("***** Save Checkpoint *****")
        save_path = os.path.join(args.save_checkpoint_path,
                                 "epoch_%d_val_acc_%f" % (epoch, accuracy))
        save_checkpoint(model, save_path)
        print("Save checkpoint to: ", save_path)

        # save best model
        if best_acc < accuracy:
            save_path = os.path.join(args.save_checkpoint_path, "best_model")
            if os.path.exists(save_path):
                shutil.rmtree(save_path, True)
            save_checkpoint(model, save_path)
            best_acc = accuracy

        loss_list.append(train_loss)
        accuracy_list.append(accuracy)
    print("End Training!")
    print("Max Accuracy: ", best_acc)

    # saving training information
    print("***** Save Logs *****")
    save_logs(loss_list, "eager/losses.txt")
    print("Save loss info to: ", "eager/losses.txt")
    save_logs(accuracy_list, "eager/accuracy.txt")
    print("Save acc info to: ", "eager/accuracy.txt")
示例#9
0
def main(args):
    # path setup
    training_results_path = os.path.join(args.results, args.tag)
    os.makedirs(training_results_path, exist_ok=True)

    # build dataloader
    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        dataset_size=9469,
        batch_size=args.train_batch_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=3925,
        batch_size=args.val_batch_size,
    )

    # oneflow init
    start_t = time.time()
    alexnet_module = alexnet()
    if args.load_checkpoint != "":
        print("load_checkpoint >>>>>>>>> ", args.load_checkpoint)
        alexnet_module.load_state_dict(flow.load(args.load_checkpoint))

    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    of_cross_entropy = flow.nn.CrossEntropyLoss()

    alexnet_module.to("cuda")
    of_cross_entropy.to("cuda")

    of_sgd = flow.optim.SGD(alexnet_module.parameters(),
                            lr=args.learning_rate,
                            momentum=args.mom)

    class AlexNetGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.alexnet = alexnet_module
            self.cross_entropy = of_cross_entropy
            self.add_optimizer(of_sgd)
            self.train_data_loader = train_data_loader

        def build(self):
            image, label = self.train_data_loader()
            image = image.to("cuda")
            label = label.to("cuda")
            logits = self.alexnet(image)
            loss = self.cross_entropy(logits, label)
            loss.backward()
            return loss

    alexnet_graph = AlexNetGraph()

    class AlexNetEvalGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.alexnet = alexnet_module
            self.val_data_loader = val_data_loader

        def build(self):
            image, label = self.val_data_loader()
            image = image.to("cuda")
            with flow.no_grad():
                logits = self.alexnet(image)
                predictions = logits.softmax()
            return predictions, label

    alexnet_eval_graph = AlexNetEvalGraph()

    of_losses = []
    of_accuracy = []
    all_samples = len(val_data_loader) * args.val_batch_size
    print_interval = 20

    for epoch in range(args.epochs):
        alexnet_module.train()

        for b in range(len(train_data_loader)):
            # oneflow graph train
            start_t = time.time()
            loss = alexnet_graph()
            end_t = time.time()
            if b % print_interval == 0:
                l = loss.numpy()
                of_losses.append(l)
                print(
                    "epoch {} train iter {} oneflow loss {}, train time : {}".
                    format(epoch, b, l, end_t - start_t))

        print("epoch %d train done, start validation" % epoch)

        alexnet_module.eval()
        correct_of = 0.0
        for b in range(len(val_data_loader)):
            start_t = time.time()
            predictions, label = alexnet_eval_graph()
            of_predictions = predictions.numpy()
            clsidxs = np.argmax(of_predictions, axis=1)

            label_nd = label.numpy()
            for i in range(args.val_batch_size):
                if clsidxs[i] == label_nd[i]:
                    correct_of += 1
            end_t = time.time()

        top1 = correct_of / all_samples
        of_accuracy.append(top1)
        print("epoch %d, oneflow top1 val acc: %f" % (epoch, top1))

        flow.save(
            alexnet_module.state_dict(),
            os.path.join(
                args.save_checkpoint_path,
                "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples),
            ),
        )

    writer = open("graph/losses.txt", "w")
    for o in of_losses:
        writer.write("%f\n" % o)
    writer.close()

    writer = open("graph/accuracy.txt", "w")
    for o in of_accuracy:
        writer.write("%f\n" % o)
    writer.close()
示例#10
0
def main():
    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # Set the random seed for reproducible experiments
    random.seed(230)
    torch.manual_seed(230)
    np.random.seed(230)
    torch.cuda.manual_seed(230)
    warnings.filterwarnings("ignore")

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # fetch dataloaders, considering full-set vs. sub-set scenarios
    if params.subset_percent < 1.0:
        train_dl = data_loader.fetch_subset_dataloader('train', params)
    else:
        train_dl = data_loader.fetch_dataloader('train', params)

    dev_dl = data_loader.fetch_dataloader('dev', params)

    logging.info("- done.")
    """
    Load student and teacher model
    """
    if "distill" in params.model_version:

        # Specify the student models
        if params.model_version == "cnn_distill":  # 5-layers Plain CNN
            print("Student model: {}".format(params.model_version))
            model = net.Net(params).cuda()

        elif params.model_version == "shufflenet_v2_distill":
            print("Student model: {}".format(params.model_version))
            model = shufflenet.shufflenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "mobilenet_v2_distill":
            print("Student model: {}".format(params.model_version))
            model = mobilenet.mobilenetv2(class_num=args.num_class).cuda()

        elif params.model_version == 'resnet18_distill':
            print("Student model: {}".format(params.model_version))
            model = resnet.ResNet18(num_classes=args.num_class).cuda()

        elif params.model_version == 'resnet50_distill':
            print("Student model: {}".format(params.model_version))
            model = resnet.ResNet50(num_classes=args.num_class).cuda()

        elif params.model_version == "alexnet_distill":
            print("Student model: {}".format(params.model_version))
            model = alexnet.alexnet(num_classes=args.num_class).cuda()

        elif params.model_version == "vgg19_distill":
            print("Student model: {}".format(params.model_version))
            model = models.vgg19_bn(num_classes=args.num_class).cuda()

        elif params.model_version == "googlenet_distill":
            print("Student model: {}".format(params.model_version))
            model = googlenet.GoogleNet(num_class=args.num_class).cuda()

        elif params.model_version == "resnext29_distill":
            print("Student model: {}".format(params.model_version))
            model = resnext.CifarResNeXt(cardinality=8,
                                         depth=29,
                                         num_classes=args.num_class).cuda()

        elif params.model_version == "densenet121_distill":
            print("Student model: {}".format(params.model_version))
            model = densenet.densenet121(num_class=args.num_class).cuda()

        # optimizer
        if params.model_version == "cnn_distill":
            optimizer = optim.Adam(model.parameters(),
                                   lr=params.learning_rate *
                                   (params.batch_size / 128))
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate *
                                  (params.batch_size / 128),
                                  momentum=0.9,
                                  weight_decay=5e-4)

        iter_per_epoch = len(train_dl)
        warmup_scheduler = utils.WarmUpLR(
            optimizer, iter_per_epoch *
            args.warm)  # warmup the learning rate in the first epoch

        # specify loss function
        if args.self_training:
            print(
                '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>self training>>>>>>>>>>>>>>>>>>>>>>>>>>>>>'
            )
            loss_fn_kd = loss_kd_self
        else:
            loss_fn_kd = loss_kd
        """ 
            Specify the pre-trained teacher models for knowledge distillation
            Checkpoints can be obtained by regular training or downloading our pretrained models
            For model which is pretrained in multi-GPU, use "nn.DaraParallel" to correctly load the model weights.
        """
        if params.teacher == "resnet18":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet18(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/0.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "alexnet":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = alexnet.alexnet(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_alexnet/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "googlenet":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = googlenet.GoogleNet(num_class=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_googlenet/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "vgg19":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = models.vgg19_bn(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_vgg19/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "resnet50":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet50(num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/50.pth.tar'

        elif params.teacher == "resnet101":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet101(num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet101/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "densenet121":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = densenet.densenet121(
                num_class=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_densenet121/best.pth.tar'
            # teacher_model = nn.DataParallel(teacher_model).cuda()

        elif params.teacher == "resnext29":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnext.CifarResNeXt(
                cardinality=8, depth=29, num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/50.pth.tar'
                teacher_model = nn.DataParallel(teacher_model).cuda()

        elif params.teacher == "mobilenet_v2":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = mobilenet.mobilenetv2(
                class_num=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_mobilenet_v2/best.pth.tar'

        elif params.teacher == "shufflenet_v2":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = shufflenet.shufflenetv2(
                class_num=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_shufflenet_v2/best.pth.tar'

        utils.load_checkpoint(teacher_checkpoint, teacher_model)

        # Train the model with KD
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate_kd(model, teacher_model, train_dl, dev_dl,
                              optimizer, loss_fn_kd, warmup_scheduler, params,
                              args, args.restore_file)

    # non-KD mode: regular training to obtain a baseline model
    else:
        print("Train base model")
        if params.model_version == "cnn":
            model = net.Net(params).cuda()

        elif params.model_version == "mobilenet_v2":
            print("model: {}".format(params.model_version))
            model = mobilenet.mobilenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "shufflenet_v2":
            print("model: {}".format(params.model_version))
            model = shufflenet.shufflenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "alexnet":
            print("model: {}".format(params.model_version))
            model = alexnet.alexnet(num_classes=args.num_class).cuda()

        elif params.model_version == "vgg19":
            print("model: {}".format(params.model_version))
            model = models.vgg19_bn(num_classes=args.num_class).cuda()

        elif params.model_version == "googlenet":
            print("model: {}".format(params.model_version))
            model = googlenet.GoogleNet(num_class=args.num_class).cuda()

        elif params.model_version == "densenet121":
            print("model: {}".format(params.model_version))
            model = densenet.densenet121(num_class=args.num_class).cuda()

        elif params.model_version == "resnet18":
            model = resnet.ResNet18(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet50":
            model = resnet.ResNet50(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet101":
            model = resnet.ResNet101(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet152":
            model = resnet.ResNet152(num_classes=args.num_class).cuda()

        elif params.model_version == "resnext29":
            model = resnext.CifarResNeXt(cardinality=8,
                                         depth=29,
                                         num_classes=args.num_class).cuda()
            # model = nn.DataParallel(model).cuda()

        if args.regularization:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Loss of Regularization>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = loss_kd_regularization
        elif args.label_smoothing:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Label Smoothing>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = loss_label_smoothing
        else:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Normal Training>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = nn.CrossEntropyLoss()
            if args.double_training:  # double training, compare to self-KD
                print(
                    ">>>>>>>>>>>>>>>>>>>>>>>>Double Training>>>>>>>>>>>>>>>>>>>>>>>>"
                )
                checkpoint = 'experiments/pretrained_teacher_models/base_' + str(
                    params.model_version) + '/best.pth.tar'
                utils.load_checkpoint(checkpoint, model)

        if params.model_version == "cnn":
            optimizer = optim.Adam(model.parameters(),
                                   lr=params.learning_rate *
                                   (params.batch_size / 128))
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate *
                                  (params.batch_size / 128),
                                  momentum=0.9,
                                  weight_decay=5e-4)

        iter_per_epoch = len(train_dl)
        warmup_scheduler = utils.WarmUpLR(optimizer,
                                          iter_per_epoch * args.warm)

        # Train the model
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate(model, train_dl, dev_dl, optimizer, loss_fn, params,
                           args.model_dir, warmup_scheduler, args,
                           args.restore_file)