Exemplo n.º 1
0
def main(args):

    # get arguments
    rate_num = args.rate_num
    use_side_feature = args.use_side_feature
    lr = args.lr
    weight_decay = args.weight_decay
    num_epochs = args.num_epochs
    hidden_dim = args.hidden_dim
    side_hidden_dim = args.side_hidden_dim
    out_dim = args.out_dim
    drop_out = args.drop_out
    split_ratio = args.split_ratio
    save_steps = args.save_steps
    log_dir = args.log_dir
    saved_model_folder = args.saved_model_folder
    use_data_whitening = args.use_data_whitening
    use_laplacian_loss = args.use_laplacian_loss
    laplacian_loss_weight = args.laplacian_loss_weight

    # mark and record the training file, save the training arguments for future analysis
    post_fix = '/' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
    log_dir = log_dir + post_fix
    writer = SummaryWriter(log_dir=log_dir)
    f = open(log_dir + '/test.txt', 'a')
    f.write(str(vars(args)))
    f.close()

    print(log_dir)

    #get prepared data
    feature_u, feature_v, feature_dim, all_M_u, all_M_v, side_feature_u, side_feature_v, all_M, mask, user_item_matrix_train, user_item_matrix_test, laplacian_u, laplacian_v = prepare(
        args)

    if not os.path.exists(saved_model_folder):
        os.makedirs(saved_model_folder)
    weights_name = saved_model_folder + post_fix + '_weights'

    net = utils.create_models(feature_u, feature_v, feature_dim, hidden_dim,
                              rate_num, all_M_u, all_M_v, side_hidden_dim,
                              side_feature_u, side_feature_v, use_side_feature,
                              out_dim, drop_out)
    net.train()  # in train mode

    # create AMSGrad optimizer
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)
    Loss = utils.loss(all_M, mask, user_item_matrix_train,
                      laplacian_loss_weight)
    iter_bar = tqdm(range(num_epochs), desc='Iter (loss=X.XXX)')
    for epoch in iter_bar:

        optimizer.zero_grad()

        score = net.forward()

        if use_laplacian_loss:
            loss = Loss.laplacian_loss(score, laplacian_u, laplacian_v)
        else:
            loss = Loss.loss(score)

        loss.backward()

        optimizer.step()

        with torch.no_grad():
            rmse = Loss.rmse(score)

            val_rmse = validate(score, rate_num, user_item_matrix_test)
            iter_bar.set_description(
                'Iter (loss=%5.3f, rmse=%5.3f, val_rmse=%5.5f)' %
                (loss.item(), rmse.item(), val_rmse.item()))

            #             writer.add_scalars('scalar',{'loss': loss.item(), 'rmse': rmse.item(), 'val_rmse':val_rmse.item(),},epoch)
            writer.add_scalars('scalar', {'loss': loss.item()}, epoch)

        if epoch % save_steps == 0:
            torch.save(net.state_dict(), weights_name)

    rmse = Loss.rmse(score)
    print('Final training RMSE: ', rmse.data.item())
    torch.save(net.state_dict(), weights_name)

    sm = nn.Softmax(dim=0)
    score = sm(score)
    score_list = torch.split(score, rate_num)
    pred = 0
    for i in range(rate_num):
        pred += (i + 1) * score_list[0][i]

    pred = utils.var_to_np(pred)

    #     pred = np.load('./prediction.npy')

    ### test the performance
    #     user_item_matrix_test = np.load('./processed_dataset/user_item_matrix_test.npy')
    test_mask = user_item_matrix_test > 0

    square_err = (pred * test_mask - user_item_matrix_test)**2
    mse = square_err.sum() / test_mask.sum()
    test_rmse = np.sqrt(mse)
    print('Test RMSE: ', test_rmse)
Exemplo n.º 2
0
# -*- coding: utf-8 -*-
"""
File creating the final predictions
"""

import utils

labels = [0, 1, 2]

# parameters of the classifier
params = {0: ['KS_7', 0.01], 1: ['KS_4', 0.005], 2: ['KS_5', 0.002]}

utils.create_models(labels, params)
Exemplo n.º 3
0
def test_net(model_choice,
             resize,
             image_size,
             TTA,
             ensemble,
             test_set_output,
             test_with_labels,
             only_test_single,
             test_image_name,
             test_root,
             validate_root,
             num_test=50):
    '''
    Model test, which includes three different tests:
        1. If test_set_output = 1, we output the prediction masks of all test images in directory ./output. 
            A submission file is also an output, as required in the competition.
        2. If test_with_labels = 1, we test all the images in the dataset and print the F1 and average loss.
        3. If only_test_single = 1, we only test a single image, i.e. pass it to the network. 
            It also outputs the original image coverred by the prediction mask, saved as test.png.
    
    
    @model_choice: 1 for LinkNet, 2 for D-LinkNet, 3 for D-LinkNet+.    
    @resize: boolean flag for image resizing.    
    @image_size: the image size for the images to trained.
    @TTA: boolean flag for test time augmentation. 
    @ensemble: boolean flag to enable ensemble when testing
    @test_set_output: boolean flag for testing all the images in the test dataset.
    @test_with_labels: boolean flag for testing on a validation dataset, with labels provided.
    @only_test_single: boolean flag for testing a single image.
    @test_image_name: the name of the image to be tested.
    @test_root: root directory for test dataset.
    @validate_root: root directory for validation dataset.
    @num_test: number of test images in the test dataset.
    '''

    net = utils.create_models(model_choice)
    linkNet = None
    DlinkNet = None

    weights_name = './parameters/weights' + str(model_choice)
    #    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    if RUN_ON_GPU:
        net.load_state_dict(torch.load(weights_name))
    else:
        net.load_state_dict(
            torch.load(weights_name,
                       map_location=lambda storage, loc: storage))
    net.eval()

    if ensemble:
        linkNet = utils.create_models(0)
        DlinkNet = utils.create_models(1)
        if RUN_ON_GPU:
            linkNet.load_state_dict(torch.load('./parameters/weights0'))
            DlinkNet.load_state_dict(torch.load('./parameters/weights1'))
        else:
            linkNet.load_state_dict(
                torch.load('./parameters/weights0',
                           map_location=lambda storage, loc: storage))
            DlinkNet.load_state_dict(
                torch.load('./parameters/weights1',
                           map_location=lambda storage, loc: storage))
        linkNet.eval()
        DlinkNet.eval()

    if test_with_labels:
        loss, f1 = test.test_batch_with_labels(net,
                                               validate_root,
                                               resize=resize,
                                               batch_size=1,
                                               image_size=image_size,
                                               smooth=1.0,
                                               lam=1.0)
        print('F1 is evaluated as ', f1)
        print('Average batch loss is ', loss)

    if only_test_single:
        if ensemble:
            mask, image = test.test_single_with_ensemble(linkNet,
                                                         DlinkNet,
                                                         net,
                                                         test_image_name,
                                                         size=image_size,
                                                         resize=resize)
        elif TTA:
            mask, image = test.test_single_with_TTA(net,
                                                    test_image_name,
                                                    size=image_size,
                                                    resize=resize)
        else:
            mask, image = test.test_single_image(net,
                                                 test_image_name,
                                                 size=image_size,
                                                 resize=resize)
        io.imshow(image)
        io.imsave('test.png', image)

    if test_set_output:
        if not os.path.exists('./output'):
            os.makedirs('./output')

        for i in range(1, num_test + 1):
            t = 'test_' + str(i)
            name = test_root + t + '/' + t + '.png'
            if ensemble:
                mask, image = test.test_single_with_ensemble(linkNet,
                                                             DlinkNet,
                                                             net,
                                                             name,
                                                             size=image_size,
                                                             resize=resize)
            elif TTA:
                mask, image = test.test_single_with_TTA(net,
                                                        name,
                                                        size=image_size,
                                                        resize=resize)
            else:
                mask, image = test.test_single_image(net,
                                                     name,
                                                     size=image_size,
                                                     resize=resize)
            io.imsave('./output/' + 'test' + str(i) + '.png', mask)

        submission_filename = 'submission.csv'

        image_filenames = []
        for i in range(1, num_test + 1):
            image_filename = 'output/test' + str(i) + '.png'
            print(image_filename)
            image_filenames.append(image_filename)
        mask_to_submission.masks_to_submission(submission_filename,
                                               *image_filenames)
Exemplo n.º 4
0
def train_net(root,
              resize,
              data_augment,
              rotate,
              change_color,
              lr,
              weight_decay,
              model_choice,
              save_ckpt,
              image_size,
              batch_size,
              num_epochs,
              save_test_image,
              test_image_name,
              early_stop,
              early_stop_tol,
              lr_decay,
              decay_rate,
              decay_period,
              validate_root,
              loss_type='bce',
              smooth=1.0,
              lam=1.0,
              gamma=2.0):
    '''
    Network training, which will output:
        1. log for loss in every iteration, in text file.
        2. saved checkpoint which contains the trained parameters, in directory ./parameters
        3. segmentation result on the test image, saved in directory ./epoch_output.
    
    Parameters:
        @root: root directory for training dataset.
        @resize: boolean flag for image resizing.
        @data_augment: boolean flag for DA8 (randomly rotate 90 degrees, flip horizontally and vertically).
        @rotate: boolean flag for random rotation to the training images.
        @change_color: boolean flag for random perturbation on HSV channels of the training images.
        @lr: learning rate.
        @weight_decay: weight decay for L2 regularization on the network parameters.
        @model_choice: 1 for LinkNet, 2 for D-LinkNet, 3 for D-LinkNet+.
        @save_ckpt: the period (in epochs) to save the checkpoint of the network.
        @image_size: the image size for the images to trained.
        @batch_size: batch size for mini-batch stochastic gradient descent.
        @num_epochs: number of epochs for training.
        @save_test_image: the period (in epochs) to save the prediction of the test image.
        @test_image_name: the name of the test image.
        @early_stop: the boolean flag to have early stop.
        @early_stop_tol: the tolerance (in number of saving checkpoints) to trigger early stop.
        @lr_decay: boolean flag for learning rate decay in every decay period.
        @decay_rate: decay ratio for learning rate, e.g. lr = lr * lr_decay.
        @decay_period: the period in number of epochs to trigger the learning rate decay.
        @validate_root: root directory for validation dataset (mainly for evaluation of network during training).
        @loss_type: either 'bce' (BCE loss) or 'focal' (focal loss).
        @smooth: number to be added on denominator and numerator when compute dice loss.
        @lam: weight to balance the dice loss in the final combined loss.
        @gamma: for focal loss.
    '''

    if os.path.exists('./epoch_output'):
        shutil.rmtree('./epoch_output')
    os.makedirs('./epoch_output')

    if not os.path.exists('./parameters'):
        os.makedirs('./parameters')
    weights_name = './parameters/weights' + str(model_choice)

    net = utils.create_models(model_choice)
    net.train()  # in train mode

    # net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))

    # create AMSGrad optimizer
    optimizer = optim.Adam(net.parameters(),
                           lr=lr,
                           weight_decay=weight_decay,
                           amsgrad=True)
    Loss = utils.loss(smooth, lam, gamma, loss_type)

    dataloader = utils.get_data_loader(root, resize, data_augment, image_size,
                                       batch_size, rotate, change_color)

    num_batch = len(dataloader)
    total_train_iters = num_epochs * num_batch

    loss_history = []
    print('Started training at {}'.format(
        time.asctime(time.localtime(time.time()))))
    test_loss = 100.0
    count = 0
    for epoch in range(num_epochs):
        print('Start epoch ', epoch)
        epoch_loss = 0
        t = time.time()
        for iteration, batch in enumerate(dataloader, epoch * num_batch + 1):
            print('Iteration: ', iteration)
            print('Time for loading the data takes: ', time.time() - t, ' s')
            t = time.time()
            image = utils.np_to_var(batch['image'])
            mask = utils.np_to_var(batch['mask'])

            optimizer.zero_grad()

            pred = net.forward(image)

            loss = Loss.final_loss(pred, mask)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.data.item()

            # print the log info
            print('Iteration [{:6d}/{:6d}] | loss: {:.4f}'.format(
                iteration, total_train_iters, loss.data.item()))
            print('Time spent on back propagation: ', time.time() - t, ' s')
            loss_history.append(loss.data.item())
            t = time.time()

        # save the test image for visualizing the training outcome
        if (epoch + 1) % save_test_image == 0:
            with torch.no_grad():
                _, test_image = test.test_single_image(net,
                                                       test_image_name,
                                                       resize=False)
            io.imsave('./epoch_output/test_epoch' + str(epoch) + '.png',
                      test_image)

        # early stop
        if early_stop and (epoch + 1) % save_ckpt == 0:
            with torch.no_grad():
                loss, f1 = test.test_batch_with_labels(net,
                                                       validate_root,
                                                       resize=False,
                                                       batch_size=10,
                                                       image_size=image_size,
                                                       smooth=smooth,
                                                       lam=lam)
                print('On the validation dataset, loss: ', loss, ', F1: ', f1)
                if loss <= test_loss:
                    test_loss = loss
                    count = 0
                    torch.save(net.state_dict(), weights_name)
                elif count < early_stop_tol:
                    count += 1
                    lr *= decay_rate
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr
                    print('The new loss is found to be larger than before')
                else:
                    print('Reach the early stop tolerence...')
                    print('Break the update at ', epoch, 'th epoch')
                    break

        if not early_stop and (epoch + 1) % save_ckpt == 0:
            with torch.no_grad():
                torch.save(net.state_dict(), weights_name)

        if lr_decay and (epoch + 1) % decay_period == 0:
            with torch.no_grad():
                lr *= decay_rate
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

        epoch_loss /= num_batch
        print('In the epoch ', epoch, ', the average batch loss is ',
              epoch_loss)

    if not early_stop:
        torch.save(net.state_dict(), weights_name)

    # save the loss history
    with open('loss.txt', 'wt') as file:
        file.write('\n'.join(['{}'.format(loss) for loss in loss_history]))
        file.write('\n')
Exemplo n.º 5
0
def main(args):
    
    # 获取参数
    rate_num = args.rate_num
    use_side_feature = args.use_side_feature  # using side feature or not
    use_GAT = args.use_GAT
    lr = args.lr
    weight_decay = args.weight_decay
    num_epochs = args.num_epochs
    hidden_dim = args.hidden_dim
    side_hidden_dim = args.side_hidden_dim
    out_dim = args.out_dim
    drop_out = args.drop_out
    split_ratio = args.split_ratio
    save_steps = args.save_steps
    saved_model_folder = args.saved_model_folder
    laplacian_loss_weight = args.laplacian_loss_weight

    post_fix = '/' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

    # 数据预处理
    feature_u, feature_v, feature_dim, all_M_u, all_M_v, side_feature_u, side_feature_v, all_M, mask,\
    user_item_matrix_train, user_item_matrix_test, laplacian_u, laplacian_v = prepare(args)

    if not os.path.exists(saved_model_folder):
        os.makedirs(saved_model_folder)  
    weights_name = saved_model_folder + post_fix + '_weights'

    net = utils.create_models(feature_u, feature_v, feature_dim, hidden_dim, rate_num, all_M_u, all_M_v,
                              side_hidden_dim, side_feature_u, side_feature_v,
                              use_side_feature, use_GAT, out_dim, user_item_matrix_train, drop_out)
    net.train()

    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)
    Loss = utils.loss(all_M, mask, user_item_matrix_train, laplacian_loss_weight)
    iter_bar = tqdm(range(num_epochs), desc='Iter (loss=X.XXX)')

    for epoch in iter_bar:

        optimizer.zero_grad()

        score = net.forward()

        loss = Loss.loss(score)

        loss.backward()

        optimizer.step()

        with torch.no_grad():
            rmse = Loss.rmse(score)
            
            val_rmse = validate(score, rate_num, user_item_matrix_test)
            iter_bar.set_description('Iter (loss=%5.3f, rmse=%5.3f, val_rmse=%5.5f)'%(loss.item(), rmse.item(), val_rmse.item()))


        if epoch % save_steps == 0:
            torch.save(net.state_dict(), weights_name)

    rmse = Loss.rmse(score)
    print('Final training RMSE: ', rmse.data.item())        
    torch.save(net.state_dict(), weights_name)
    
    sm = nn.Softmax(dim = 0)
    score = sm(score)
    score_list = torch.split(score, rate_num)
    pred = 0
    for i in range(rate_num):
        pred += (i + 1) * score_list[0][i]

    pred = utils.var_to_np(pred)

    test_mask = user_item_matrix_test > 0

    square_err = (pred * test_mask - user_item_matrix_test) ** 2
    mse = square_err.sum() / test_mask.sum()
    test_rmse = np.sqrt(mse)
    print('Test RMSE: ', test_rmse)