Пример #1
0
    def load_tensorflow(self, model_file, serialization_mode, init_args):
        import tensorflow as tf
        import numpy as np
        import pandas as pd
        from builtin_models.tensorflow import save_model

        model_folder = os.path.dirname(os.path.abspath(
            model_file)) if os.path.isfile(model_file) else model_file
        print(f'IN_MODEL_PATH = {model_folder}')
        print(f'IN_MODLE_FILES = {os.listdir(model_folder)}')
        tf_graph = tf.Graph()
        session = tf.Session(graph=tf_graph)
        graph_tags, signature_def_key = self.parse_tensorflow_init_args(
            init_args)
        print(
            f'tags = {graph_tags}, def_key = {signature_def_key}, model_folder = {model_folder}'
        )
        graph_def = tf.saved_model.load(session, graph_tags, model_folder)
        print(f'graph_def = {graph_def}')
        signature_def = graph_def.signature_def[signature_def_key]
        input_tensors = signature_def.inputs
        output_tensors = signature_def.outputs
        save_model(session,
                   input_tensors,
                   output_tensors,
                   path=self.out_model_path)
Пример #2
0
    def load_keras(self, model_file, serialization_mode):
        import keras
        from keras.models import load_model
        from builtin_models.keras import save_model

        model = load_model(model_file)
        save_model(model, self.out_model_path)
Пример #3
0
 def load_sklearn(self, model_file, serialization_mode):
     import pickle
     import joblib
     from builtin_models.sklearn import save_model
     model = None
     try:
         model = joblib.load(model_file)
     except:
         with open(model_file, 'rb') as fp:
             model = pickle(fp)
     save_model(model, self.out_model_path)
Пример #4
0
def run_pipeline(action, model_path):
    input_size = 784  # img_size = (28,28) ---> 28*28=784 in total
    hidden_size = 500  # number of nodes at hidden layer
    num_classes = 10  # number of output classes discrete range [0,9]
    num_epochs = 5  # number of times which the entire dataset is passed throughout the model
    batch_size = 64  # the size of input data took for one iteration
    lr = 1e-3  # size of step

    train_data = dsets.MNIST(root='./data',
                             train=True,
                             transform=transforms.ToTensor(),
                             download=True)

    test_data = dsets.MNIST(root='./data',
                            train=False,
                            transform=transforms.ToTensor())

    train_gen = torch.utils.data.DataLoader(dataset=train_data,
                                            batch_size=batch_size,
                                            shuffle=True)

    test_gen = torch.utils.data.DataLoader(dataset=test_data,
                                           batch_size=batch_size,
                                           shuffle=False)

    net = MnistNet(input_size, hidden_size, num_classes)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'DEVICE={device}')
    print(f'os.environ={os.environ}')
    net = net.to(device)

    loss_function = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_gen):
            images = Variable(images.view(-1, 28 * 28)).to(device)
            labels = Variable(labels).to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                print('Epoch [%d/%d], Step [%d/%d]' %
                      (epoch + 1, num_epochs, i + 1,
                       len(train_data) // batch_size))

    save_model(net, model_path, conda_env=None)
    print("save_model Done")
Пример #5
0
def load_pytorch(model_file, serialization, out_model_path):
    from builtin_models.pytorch import save_model
    if serialization == 'cloudpickle':
        print(f'model loading(cloudpickle): {model_file} to {out_model_path}')
        import cloudpickle
        with open(model_file, 'rb') as fp:
            model = cloudpickle.load(fp)
        save_model(model, out_model_path)
        print(f'model loaded: {out_model_path}')
    elif serialization == 'savedmodel':
        pass
    else:
        pass
Пример #6
0
def entrance(model_path='pretrained',
             data_path='',
             save_path='saved_model',
             label_map_path='saved_path',
             model_type='densenet201',
             pretrained=True,
             memory_efficient=False,
             num_classes=2,
             epochs=100,
             batch_size=16,
             learning_rate=0.001,
             random_seed=231,
             patience=2):
    train_transforms, test_transforms = get_transform()
    train_set = datasets.ImageFolder(data_path, transform=train_transforms)
    valid_set = datasets.ImageFolder(data_path, transform=test_transforms)
    indices = torch.randperm(len(train_set))
    valid_size = len(train_set) // 10
    train_indices = indices[:len(indices) - valid_size]
    valid_indices = indices[len(indices) - valid_size:]
    train_set = torch.utils.data.Subset(train_set, train_indices)
    valid_set = torch.utils.data.Subset(valid_set, valid_indices)
    model = MyDenseNet(model_type=model_type,
                       model_path=model_path,
                       pretrained=pretrained,
                       memory_efficient=memory_efficient,
                       classes=num_classes)
    os.makedirs(save_path, exist_ok=True)
    train(model=model,
          train_set=train_set,
          valid_set=valid_set,
          save_path=save_path,
          epochs=epochs,
          batch_size=batch_size,
          lr=learning_rate,
          random_seed=random_seed,
          patience=patience)

    # current_dir = os.path.dirname(os.path.realpath(__file__))
    # dependencies = [os.path.join(current_dir, filename) for filename in ["densenet.py", "utils.py"]]
    # save_model(model, save_path, dependencies=dependencies)
    save_model(model, save_path, dependencies=[])
    os.makedirs(label_map_path, exist_ok=True)
    copyfile(os.path.join(data_path, 'index_to_label.json'),
             os.path.join(label_map_path, 'index_to_label.json'))
    logger.info('This experiment has been completed.')
Пример #7
0
def load_keras(model_file, serialization, out_model_path):
    from builtin_models.keras import save_model, load_model_from_local_file
    model = load_model_from_local_file(model_file)
    path = './model'
    save_model(model, path)
Пример #8
0
def train(model,
          train_set,
          valid_set,
          test_set,
          save_path,
          num_classes,
          epochs,
          batch_size,
          lr=0.001,
          wd=0.0001,
          momentum=0.9,
          random_seed=None,
          model_type='densenet201',
          memory_efficient=False,
          label_list=None,
          dependencies=[]):
    if random_seed is not None:
        if torch.cuda.is_available():
            if torch.cuda.device_count() > 1:
                torch.cuda.manual_seed_all(random_seed)
            else:
                torch.cuda.manual_seed(random_seed)
        else:
            torch.manual_seed(random_seed)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=(torch.cuda.is_available()),
        num_workers=0)
    test_loader = torch.utils.data.DataLoader(
        test_set,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=(torch.cuda.is_available()),
        num_workers=0)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = torch.utils.data.DataLoader(
            valid_set,
            batch_size=batch_size,
            shuffle=False,
            pin_memory=(torch.cuda.is_available()),
            num_workers=0)
    if torch.cuda.is_available():
        model = model.cuda()

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model).cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr,
                                momentum=momentum,
                                nesterov=True,
                                weight_decay=wd)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[0.5 * epochs, 0.75 * epochs], gamma=0.1)

    with open(os.path.join(save_path, 'results.csv'), 'w') as f:
        f.write(
            'epoch,train_loss,train_error,valid_loss,valid_error,test_error\n')

    best_error = 1
    for epoch in range(epochs):
        scheduler.step()
        _, train_loss, train_error = train_epoch(model=model,
                                                 loader=train_loader,
                                                 optimizer=optimizer,
                                                 epoch=epoch,
                                                 epochs=epochs)
        _, valid_loss, valid_error = test(
            model=model,
            loader=valid_loader if valid_loader else test_loader,
            is_test=not valid_loader)

        # Determine if model is the best
        if valid_loader:
            if valid_error < best_error:
                best_error = valid_error
                print('New best error: {:.4f}'.format(best_error))
                if torch.cuda.device_count() > 1:
                    torch.save(model.module.state_dict(),
                               os.path.join(save_path, 'model.pth'))
                else:
                    torch.save(model.state_dict(),
                               os.path.join(save_path, 'model.pth'))

        else:
            if torch.cuda.device_count() > 1:
                torch.save(model.module.state_dict(),
                           os.path.join(save_path, 'model.pth'))
            else:
                torch.save(model.state_dict(),
                           os.path.join(save_path, 'model.pth'))

        from builtin_models.pytorch import save_model
        curr_path = "."
        print(f'CURRENT PATH = {os.path.abspath(curr_path)}')
        print(f'CURRENT FOLDER = {os.listdir(curr_path)}')
        print(f'DEPENDENCIES = {dependencies}')
        for dep in dependencies:
            print(f'{dep} exists: {os.path.exists(dep)}')
        save_model(model, save_path, dependencies=dependencies)
        print(f'MODEL PATH(OUT) = {save_path}')
        print(f'MODEL FOLDER(OUT) = {os.listdir(save_path)}')
        # Log results
        with open(os.path.join(save_path, 'results.csv'), 'a') as f:
            f.write('{:3d},{:.6f},{:.6f},{:.5f},{:.5f},\n'.format(
                epoch + 1, train_loss, train_error, valid_loss, valid_error))

    model = MyDenseNet(model_type=model_type,
                       pretrained=False,
                       memory_efficient=memory_efficient,
                       classes=num_classes)
    model.load_state_dict(
        torch.load(os.path.join(save_path, 'model.pth'), map_location='cpu'))
    if torch.cuda.is_available():
        model = model.cuda()
        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model).cuda()
    test_results = test(model=model, loader=test_loader, is_test=True)
    _, _, test_error = test_results
    with open(os.path.join(save_path, 'results.csv'), 'a') as f:
        f.write(',,,,,{:.5f}\n'.format(test_error))
    print('Final test error: {:.4f}'.format(test_error))
Пример #9
0
def load_pytorch(model_file,
                 serialization,
                 out_model_path,
                 model_class_file,
                 init_args=None):
    import torch
    from builtin_models.pytorch import save_model
    dependencies = []
    modules = {}
    if model_class_file:
        dependencies.append(model_class_file)
        basepath = os.path.dirname(model_class_file) or '.'
        modules = load_scripts(basepath)
    class_name, init_args = parse_init(init_args)
    model = None
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'DEVICE={device}')
    if serialization == 'cloudpickle':
        print(f'model loading(cloudpickle): {model_file} to {out_model_path}')
        import cloudpickle
        retry = True
        while retry:
            try:
                with open(model_file, 'rb') as fp:
                    model = cloudpickle.load(fp)
                retry = False
            except ModuleNotFoundError as ex:
                name = ex.name.rpartition('.')[-1]
                if name in modules:
                    sys.modules[ex.name] = modules[name]
                    retry = True
                else:
                    raise ex
    elif serialization == 'savedmodel':
        print(f'model loading(savedmodel): {model_file} to {out_model_path}')
        retry = True
        while retry:
            try:
                with open(model_file, 'rb') as fp:
                    model = torch.load(model_file, map_location=device)
                retry = False
            except ModuleNotFoundError as ex:
                name = ex.name.rpartition('.')[-1]
                if name in modules:
                    sys.modules[ex.name] = modules[name]
                    retry = True
                else:
                    raise ex
    elif serialization == 'statedict':
        print(f'model loading(statedict): {model_file} to {out_model_path}')
        model_class = None
        for module in modules.values():
            model_class = getattr(module, class_name)
            if model_class:
                break
        if not model_class:
            raise NotImplementedError

        print(f'init_args = {init_args}')
        if init_args:
            model = model_class(**init_args)
        else:
            model = model_class()
        print(f'MODEL1 = {model}')
        model.load_state_dict(torch.load(model_file, map_location=device))
        print(f'MODEL2 = {model}')
    else:
        raise NotImplementedError

    print(f'model loaded: {out_model_path}')
    print(f'model={model}, dependencies={dependencies}')
    save_model(model, out_model_path, dependencies=dependencies)
    print(f'MODEL_FOLDER: {os.listdir(out_model_path)}')
Пример #10
0
    def load_pytorch(self, model_file, serialization_mode, init_args):
        import torch
        from builtin_models.pytorch import save_model
        dependencies = []
        model = None
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f'DEVICE={device}')

        if serialization_mode == 'cloudpickle':
            print(f'CLOUDPICKLE: {model_file} to {self.out_model_path}')
            import cloudpickle
            retry = True
            while retry:
                try:
                    with open(model_file, 'rb') as fp:
                        model = cloudpickle.load(fp)
                    retry = False
                except ModuleNotFoundError as ex:
                    name = ex.name.rpartition('.')[-1]
                    if name in modules:
                        sys.modules[ex.name] = modules[name]
                        retry = True
                    else:
                        raise ex
        elif serialization_mode == 'savedmodel':
            print(f'SAVEDMODEL: {model_file} to {self.out_model_path}')
            retry = True
            while retry:
                try:
                    with open(model_file, 'rb') as fp:
                        model = torch.load(model_file, map_location=device)
                    retry = False
                except ModuleNotFoundError as ex:
                    name = ex.name.rpartition('.')[-1]
                    if name in modules:
                        sys.modules[ex.name] = modules[name]
                        retry = True
                    else:
                        raise ex
        elif serialization_mode == 'statedict':
            print(f'STATEDICT: {model_file} to {self.out_model_path}')
            class_name, init_args = self.parse_pytorch_init_args(init_args)
            if not class_name:
                if len(self.modules) == 1:
                    class_name = list(self.modules.keys())[0]
                else:
                    raise NotImplementedError
            print(f'CLASS_NAME={class_name}')
            model_class = None
            for module in self.modules.values():
                model_class = getattr(module, class_name, None)
                if model_class:
                    break
            if not model_class:
                raise NotImplementedError

            print(f'INIT = {init_args}')
            if init_args:
                model = model_class(**init_args)
            else:
                model = model_class()
            model.load_state_dict(torch.load(model_file, map_location=device))
        else:
            raise NotImplementedError

        save_model(model, self.out_model_path, dependencies=self.dependencies)