Пример #1
0
def predict(model_path, im_path):
    '''
    Test procedure
    ---------------
    :param model_path: path of the saved model
    :param im_path: path of an image
    '''

    # TODO 3: load configurations from saved model, initialize the model.
    # Note: you can complete this section by referring to Part 4: test.

    # step 1: load configurations from saved model using torch.load(model_path)
    # and get the configs dictionary, configs = checkpoint['configs'],
    # then get each config from configs, eg., norm_size = configs['norm_size']
    checkpoint = torch.load(model_path)
    configs = checkpoint['configs']
    norm_size = configs['norm_size']
    output_size = configs['output_size']
    hidden_size = configs['hidden_size']
    n_layers = configs['n_layers']
    act_type = configs['act_type']

    # step 2: initialize the model by MLP()
    model = MLP(norm_size[0] * norm_size[1], output_size, hidden_size,
                n_layers, act_type)
    # step 3: load model parameters we saved in model_path
    # hint: similar to what we do in Part 4: test.
    model.load_state_dict(checkpoint['state_dict'])
    # End TODO 3
    # enter the evaluation mode
    model.eval()

    # image pre-processing, similar to what we do in ListDataset()
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(norm_size),
        transforms.ToTensor()
    ])

    # image pre-processing, similar to what we do in ListDataset()
    im = cv2.imread(im_path)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    im = transform(im)
    im.sub_(0.5).div_(0.5)

    # input im into the model
    with torch.no_grad():
        input = im.view(1, -1)
        out = model(input)
        prediction = out.argmax(1)[0].item()

    # convert index of prediction to the corresponding character
    letters = string.ascii_letters[-26:]  # ABCD...XYZ
    prediction = letters[prediction]

    print('Prediction: {}'.format(prediction))
Пример #2
0
            #     epoch, step, len(train_loader), batch_time=batch_time, loss=losses, top1=top1))

    train_loss = losses.avg
    train_acc = top1.avg
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    # vis.plot_many({'train loss': train_loss, 'train acc': train_acc})

    # validation phase
    print('validation phase')
    # vis.log('validation phase')
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    net.eval()

    end = time.time()
    for step, (images, labels) in enumerate(val_loader):
        images = images.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            logits = images
            # logits = clf(clf_norm(images))
            # logits = logits - logits.max(dim=1, keepdim=True)[0]  # Normalizing by reducing the maximum
            # logits = logits / logits.norm(p=2, dim=1, keepdim=True)  # L2 normalization
            output = net(logits)
            loss = criterion(output, labels)

        _, pred = output.max(dim=1)
Пример #3
0
def test(model_path,
         im_dir='data/character_classification/images',
         test_file_path='data/character_classification/test.json',
         batch_size=8,
         device='cpu'):
    '''
    Test procedure
    ---------------
    :param model_path: path of the saved model
    :param im_dir: path to directory with images
    :param test_file_path: file with test image paths and labels
    :param batch_size: test batch size
    :param device: 'cpu' or 'cuda'
    '''

    # load configurations from saved model, initialize and test the model
    checkpoint = torch.load(model_path)
    configs = checkpoint['configs']
    norm_size = configs['norm_size']
    output_size = configs['output_size']
    hidden_size = configs['hidden_size']
    n_layers = configs['n_layers']
    act_type = configs['act_type']

    # initialize the model by MLP()
    model = MLP(norm_size[0] * norm_size[1], output_size, hidden_size,
                n_layers, act_type)

    # load model parameters we saved in model_path
    model.load_state_dict(checkpoint['state_dict'])
    model = model.to(device)
    print('[Info] Load model from {}'.format(model_path))

    # enter the evaluation mode
    model.eval()

    # test loader
    testloader = dataLoader(im_dir, test_file_path, norm_size, batch_size)

    # run the test process
    n_correct = 0.
    n_ims = 0.

    logits = []
    all_labels = []

    with torch.no_grad(
    ):  # we do not need to compute gradients during test stage

        for ims, labels in testloader:
            ims, labels = ims.to(device), labels.type(torch.float).to(device)
            input = ims.view(ims.size(0), -1)
            out = model(input)
            predictions = out.argmax(1)
            n_correct += torch.sum(predictions == labels)
            n_ims += ims.size(0)

            logits.append(out)
            all_labels.append(labels)

        logits = torch.cat(logits, dim=0).detach().cpu().numpy()
        all_labels = torch.cat(all_labels, dim=0).cpu().numpy()

        tsne = TSNE(n_components=2, init='pca')
        Y = tsne.fit_transform(logits)

        letters = list(string.ascii_letters[-26:])
        Y = (Y - Y.min(0)) / (Y.max(0) - Y.min(0))
        for i in range(len(all_labels)):
            if (all_labels[i] < 26):
                c = plt.cm.rainbow(float(all_labels[i]) / 26)
                plt.text(Y[i, 0],
                         Y[i, 1],
                         s=letters[int(all_labels[i])],
                         color=c)
        plt.show()

    print('[Info] Test accuracy = {:.1f}%'.format(100 * n_correct / n_ims))
Пример #4
0
def train_val(im_dir,
              train_file_path,
              val_file_path,
              hidden_size,
              n_layers,
              act_type,
              norm_size,
              n_epochs,
              batch_size,
              n_letters,
              lr,
              optim_type,
              momentum,
              weight_decay,
              valInterval,
              device='cpu'):
    '''
    The main training procedure
    ----------------------------
    :param im_dir: path to directory with images
    :param train_file_path: file list of training image paths and labels
    :param val_file_path: file list of validation image paths and labels
    :param hidden_size: a list of hidden size for each hidden layer
    :param n_layers: number of layers in the MLP
    :param act_type: type of activation function, can be none, sigmoid, tanh, or relu
    :param norm_size: image normalization size, (height, width)
    :param n_epochs: number of training epochs
    :param batch_size: batch size of training and validation
    :param n_letters: number of classes, in this task it is 26 English letters
    :param lr: learning rate
    :param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta'
    :param momentum: only used if optim_type == 'sgd'
    :param weight_decay: the factor of L2 penalty on network weights
    :param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs
    :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
    '''

    # training and validation data loader
    trainloader = dataLoader(im_dir, train_file_path, norm_size, batch_size)
    valloader = dataLoader(im_dir, val_file_path, norm_size, batch_size)

    # TODO 1: initialize the MLP model and loss function
    # what is the input size of the MLP?
    # hint 1: we convert an image to a vector as the input of the MLP,
    # each image has shape [norm_size[0], norm_size[1]]
    # hint 2: Input parameters for MLP: input_size, output_size, hidden_size, n_layers, act_type
    model = MLP(norm_size[0] * norm_size[1], n_letters, hidden_size, n_layers,
                act_type)
    # loss function
    cal_loss = CrossEntropyLoss.apply
    # End TODO 1
    # put the model on CPU or GPU
    model = model.to(device)

    # optimizer
    if optim_type == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr,
                              momentum=momentum,
                              weight_decay=weight_decay)
    elif optim_type == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr,
                                  weight_decay=weight_decay)
    elif optim_type == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr,
                                  weight_decay=weight_decay)
    elif optim_type == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr,
                               weight_decay=weight_decay)
    elif optim_type == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   lr,
                                   weight_decay=weight_decay)
    else:
        print(
            '[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta'
        )
        raise NotImplementedError

    # training
    # to save loss of each training epoch in a python "list" data structure
    losses = []

    for epoch in range(n_epochs):
        # set the model in training mode
        model.train()

        # to save total loss in one epoch
        total_loss = 0.

        #TODO 2: calculate losses and train the network using the optimizer
        for step, (ims,
                   labels) in enumerate(trainloader):  # get a batch of data

            # step 1: set data type and device
            ims = ims.to(device)
            labels = labels.to(device)
            # step 2: convert an image to a vector as the input of the MLP
            ims = ims.view(batch_size, norm_size[0] * norm_size[1])
            # hint: clear gradients in the optimizer
            optimizer.zero_grad()
            # step 3: run the model which is the forward process
            pred = model(ims)
            # step 4: compute the loss, and call backward propagation function
            loss = cal_loss(pred, labels)
            loss.backward()
            # step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number
            # this operation is not differentiable
            total_loss += loss.item()
            # step 6: call a function, optimizer.step(), to update the parameters of the model
            optimizer.step()
            # End TODO 2

        # average of the total loss for iterations
        avg_loss = total_loss / len(trainloader)
        losses.append(avg_loss)
        print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss))

        # validation
        if (epoch + 1) % valInterval == 0:

            # set the model in evaluation mode
            model.eval()

            n_correct = 0.  # number of images that are correctly classified
            n_ims = 0.  # number of total images

            with torch.no_grad(
            ):  # we do not need to compute gradients during validation

                # calculate losses for validation data and do not need train the network
                for ims, labels in valloader:
                    # set data type and device
                    ims, labels = ims.to(device), labels.type(
                        torch.float).to(device)

                    # convert an image to a vector as the input of the MLP
                    input = ims.view(ims.size(0), -1)

                    # run the model which is the forward process
                    out = model(input)

                    # get the predicted value by the output using out.argmax(1)
                    predictions = out.argmax(1)

                    # sum up the number of images correctly recognized and the total image number
                    n_correct += torch.sum(predictions == labels)
                    n_ims += ims.size(0)

            # show prediction accuracy
            print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(
                epoch + 1, 100 * n_correct / n_ims))

    # save model parameters in a file
    model_save_path = 'saved_models/recognition.pth'.format(epoch + 1)

    torch.save(
        {
            'state_dict': model.state_dict(),
            'configs': {
                'norm_size': norm_size,
                'output_size': n_letters,
                'hidden_size': hidden_size,
                'n_layers': n_layers,
                'act_type': act_type
            }
        }, model_save_path)
    print('Model saved in {}\n'.format(model_save_path))

    # draw the loss curve
    plot_loss(losses)