Exemplo n.º 1
0
def main():
    fold = 0
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep +str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")
    
    # 4.2 get model
    model = get_net()
    model.cuda()

    # criterion
    optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum =0.9 ,weight_decay=1e-4)
    criterion = nn.BCEWithLogitsLoss().cuda()
    start_epoch = 0
    best_loss = 999
    best_f1 = 0
    best_results = [np.inf,0]
    val_metrics = [np.inf,0]
    resume = False
    all_files = pd.read_csv("./train.csv")
    #test_files = pd.read_csv("./sample_submission.csv")
    train_data_list,val_data_list = train_test_split(all_files,test_size = 0.13,random_state = 2050)
    # load dataset
    train_gen = HumanDataset(train_data_list,config.train_data,mode="train")
    train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=4)

    val_gen = HumanDataset(val_data_list,config.train_data,augument=False,mode="train")
    val_loader = DataLoader(val_gen,batch_size=config.batch_size,shuffle=False,pin_memory=True,num_workers=4)

    #test_gen = HumanDataset(test_files,config.test_data,augument=False,mode="test")
    #test_loader = DataLoader(test_gen,1,shuffle=False,pin_memory=True,num_workers=4)

    scheduler = lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1)
    start = timer()
    
    #train
    for epoch in range(0,config.epochs):
        scheduler.step(epoch)
        # train
        lr = get_learning_rate(optimizer)
        train_metrics = train(train_loader,model,criterion,optimizer,epoch,val_metrics,best_results,start)
        # val
        val_metrics = evaluate(val_loader,model,criterion,epoch,train_metrics,best_results,start)
        # check results 
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0],best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1],best_results[1])   
        # save model
        save_checkpoint({
                    "epoch":epoch + 1,
                    "model_name":config.model_name,
                    "state_dict":model.state_dict(),
                    "best_loss":best_results[0],
                    "optimizer":optimizer.state_dict(),
                    "fold":fold,
                    "best_f1":best_results[1],
        },is_best_loss,is_best_f1,fold)
        print('\r',end='',flush=True)
        log.write('%s  %5.1f %6.1f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                "best", epoch, epoch,                    
                train_metrics[0], train_metrics[1], 
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8],str(best_results[1])[:8],
                time_to_str((timer() - start),'min'))
            )
        log.write("\n")
        time.sleep(0.01)
def training(model, fold, log, train_image_names, train_image_labels, val_image_names, val_image_labels):
    # logging issues
    log.write(
        "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20))

    log.write(
        '----------------------|--------- Train ---------|-------- Valid ---------|-------Best '
        'Results-------|----------|\n')
    log.write(
        'mode   iter   epoch   |      loss   f1_macro    |      loss   f1_macro   |       loss   f1_macro    | time   '
        '  |\n')
    log.write(
        '----------------------------------------------------------------------------------------------------------'
        '----\n')

    # training params
    optimizer = optim.SGD(model.parameters(),
                          lr=config.learning_rate_start,
                          momentum=0.9,
                          weight_decay=config.weight_decay)
    if config.loss_name == 'ce':
        criterion = nn.BCEWithLogitsLoss().cuda()
    elif config.loss_name == 'focal':
        criterion = FocalLoss().cuda()
    elif config.loss_name == 'f1':
        criterion = F1Loss().cuda()
    else:
        raise ValueError('unknown loss name {}'.format(config.loss_name))
    best_results = [np.inf, 0]
    val_metrics = [np.inf, 0]
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=config.learning_rate_decay_epochs,
                                    gamma=config.learning_rate_decay_rate)
    start = timer()

    train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train")
    train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4)
    val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train")
    val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4)

    # train
    for epoch in range(0, config.epochs):
        # training & evaluating
        scheduler.step(epoch)
        get_learning_rate(optimizer)
        train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start)
        val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start)

        # check results
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0], best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1], best_results[1])

        # save model
        save_checkpoint({
            "epoch": epoch + 1,
            "model_name": config.model_name,
            "state_dict": model.state_dict(),
            "best_loss": best_results[0],
            "optimizer": optimizer.state_dict(),
            "fold": fold,
            "best_f1": best_results[1],
        }, is_best_loss, is_best_f1, fold)

        # print logs
        print('\r', end='', flush=True)
        log.write(
            logging_pattern % (
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8], str(best_results[1])[:8],
                time_to_str((timer() - start), 'min')
            )
        )
        log.write("\n")
        time.sleep(0.01)
Exemplo n.º 3
0
def find_lr(init_value = 1e-8, final_value=10., beta = 0.98):
    # 1. load dataset
    all_files = pd.read_csv(config.CSV_TRAIN)
    train_data_list, _  = multilabel_stratification(all_files, test_size=0.2, random_state=42)
    train_gen = HumanDataset(train_data_list,config.train_data,mode="train")
    train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=8)  

    # 2. get the model, and set the optimizer and criterion
    model = get_net()
    model.cuda()
    optimizer = optim.SGD(model.parameters(),lr = init_value,momentum=0.9,weight_decay=1e-4)
    criterion = nn.BCEWithLogitsLoss(opt_class_weight).cuda()

    # 3.set init value
    num = len(train_loader) - 1                             # num = samples_per_epoch / batch_size
    mult = (final_value / init_value) ** (1/num)            # init_value * (mult)**num ==> final_value

    lr = init_value
    optimizer.param_groups[0]['lr'] = lr
    avg_loss = 0.
    best_loss = 0.
    batch_num = 0
    losses = []
    log_lrs = []

    best_lr = 111

    model.train()
    model.zero_grad()
    
    for i,(images,target) in enumerate(train_loader):
        batch_num += 1

        # 0. get the loss of this batch
        images = images.cuda(non_blocking=True)
        target = torch.from_numpy(np.array(target)).float().cuda(non_blocking=True)
        output = model(images)
        loss = criterion(output,target)

        # 1. Compute the smoothed loss
        avg_loss = beta * avg_loss + (1-beta) *loss.item()
        smoothed_loss = avg_loss / (1 - beta**batch_num)

        # 2. Stop if the loss is exploding
        if batch_num > 1 and smoothed_loss > 4 * best_loss:
            return log_lrs, losses
        # 3. Record the best loss
        if smoothed_loss < best_loss or batch_num==1:
            best_loss = smoothed_loss
            best_lr = lr
        # 4. Store the values
        losses.append(smoothed_loss)
        log_lrs.append(math.log10(lr))


        # 5. Do the SGD step
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # 6. Update the lr for the next step
        lr *= mult
        optimizer.param_groups[0]['lr'] = lr

        print('%d:  factor:%.3f  smoothed_loss:%f best_loss:%f lr:%f best_lr: %f'%(i,smoothed_loss/best_loss, smoothed_loss, best_loss, lr, best_lr))
    return log_lrs, losses
def main():
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          'fold_' + str(config.fold)):
        os.makedirs(config.weights + config.model_name + os.sep + 'fold_' +
                    str(config.fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.logs):
        os.mkdir(config.logs)

    all_files = pd.read_csv("./input/train.csv")

    # -------------------------------------------------------
    # training
    # -------------------------------------------------------
    if config.mode == 'train':

        for fold in range(config.fold):

            # 4.2 get model
            model = get_net()
            model.cuda()

            optimizer = optim.Adam(model.parameters(), lr=config.lr)

            # ================================================================== #
            #                        Loss criterioin                             #
            # ================================================================== #
            # criterion
            # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)

            # Use the optim package to define an Optimizer that will update the weights of
            # the model for us. Here we will use Adam; the optim package contains many other
            # optimization algoriths. The first argument to the Adam constructor tells the
            # optimizer which Tensors it should update.
            assert config.loss in ['bcelog', 'f1_loss', 'focal_loss'], \
                print("Loss type {0} is unknown".format(config.loss))
            if config.loss == 'bcelog':
                criterion = nn.BCEWithLogitsLoss().cuda()
            elif config.loss == 'f1_loss':
                criterion = F1_loss().cuda()
            elif config.loss == 'focal_loss':
                criterion = FocalLoss().cuda()

            # best_loss = 999
            # best_f1 = 0
            best_results = [np.inf, 0]
            val_metrics = [np.inf, 0]

            ## k-fold--------------------------------

            # tflogger
            tflogger = TFLogger(
                os.path.join(
                    'results', 'TFlogs', config.model_name +
                    "_fold{0}_{1}".format(config.fold, fold)))

            with open(
                    os.path.join(
                        "./input/fold_{0}".format(config.fold),
                        'train_fold{0}_{1}.txt'.format(config.fold, fold)),
                    'r') as text_file:
                train_names = text_file.read().split('\n')
                # # oversample
                # s = Oversampling("./input/train.csv")
                # train_names = [idx for idx in train_names for _ in range(s.get(idx))]
                train_data_list = all_files[all_files['Id'].isin(train_names)]
                # train_data_list = all_files.copy().set_index('Id')
                # train_data_list
                # train_data_list = train_data_list.reindex(train_names)
                # 57150 -> 29016
                # reset index
                # train_data_list = train_data_list.rename_axis('Id').reset_index()
            with open(
                    os.path.join(
                        "./input/fold_{0}".format(config.fold),
                        'test_fold{0}_{1}.txt'.format(config.fold, fold)),
                    'r') as text_file:
                val_names = text_file.read().split('\n')
                val_data_list = all_files[all_files['Id'].isin(val_names)]

            # load dataset
            train_gen = HumanDataset(train_data_list,
                                     config.train_data,
                                     mode="train")
            train_loader = DataLoader(train_gen,
                                      batch_size=config.batch_size,
                                      shuffle=True,
                                      pin_memory=True,
                                      num_workers=4)

            val_gen = HumanDataset(val_data_list,
                                   config.train_data,
                                   augument=False,
                                   mode="train")
            val_loader = DataLoader(val_gen,
                                    batch_size=config.batch_size,
                                    shuffle=False,
                                    pin_memory=True,
                                    num_workers=4)

            # initialize the early_stopping object
            early_stopping = EarlyStopping(patience=7, verbose=True)

            if config.resume:
                log.write('\tinitial_checkpoint = %s\n' %
                          config.initial_checkpoint)
                checkpoint_path = os.path.join(config.weights,
                                               config.model_name, config.fold,
                                               config.initial_checkpoint,
                                               'checkpoint.pth.tar')
                loaded_model = torch.load(checkpoint_path)
                model.load_state_dict(loaded_model["state_dict"])
                start_epoch = loaded_model["epoch"]
            else:
                start_epoch = 0

            scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
            start = timer()

            # train
            for epoch in range(start_epoch, config.epochs):
                scheduler.step(epoch)
                # train
                lr = get_learning_rate(optimizer)
                train_metrics = train(train_loader, model, criterion,
                                      optimizer, epoch, val_metrics,
                                      best_results, start, config.threshold)
                # val
                val_metrics = evaluate(val_loader, model, criterion, epoch,
                                       train_metrics, best_results, start,
                                       config.threshold)
                # check results
                is_best_loss = val_metrics[0] < best_results[0]
                best_results[0] = min(val_metrics[0], best_results[0])
                is_best_f1 = val_metrics[1] > best_results[1]
                best_results[1] = max(val_metrics[1], best_results[1])
                # save model
                save_checkpoint(
                    {
                        "epoch": epoch + 1,
                        "model_name": config.model_name,
                        "state_dict": model.state_dict(),
                        "best_loss": best_results[0],
                        "optimizer": optimizer.state_dict(),
                        "fold": config.fold,
                        "kfold": fold,
                        "best_f1": best_results[1],
                    }, is_best_loss, is_best_f1, config.fold, fold)
                # print logs
                print('\r', end='', flush=True)

                log.write(
                    '%s  %5.1f %6.1f  %.2E|  %0.3f   %0.3f    |   %0.3f    %0.4f   |  %s      %s       | %s      |%s ' % ( \
                        "best", epoch, epoch, Decimal(lr),
                        train_metrics[0], train_metrics[1],
                        val_metrics[0], val_metrics[1],
                        str(best_results[0])[:8], str(best_results[1])[:8],
                        time_to_str((timer() - start), 'min'),
                        fold),
                )
                log.write("\n")
                time.sleep(0.01)

                # ================================================================== #
                #                        Tensorboard Logging                         #
                # ================================================================== #

                # 1. Log scalar values (scalar summary)
                info = {
                    'Train_loss': train_metrics[0],
                    'Train_F1_macro': train_metrics[1],
                    'Valid_loss': val_metrics[0],
                    'Valid_F1_macro': val_metrics[1],
                    'Learnging_rate': lr
                }

                for tag, value in info.items():
                    tflogger.scalar_summary(tag, value, epoch)

                # 2. Log values and gradients of the parameters (histogram summary)
                for tag, value in model.named_parameters():
                    tag = tag.replace('.', '/')
                    tflogger.histo_summary(tag,
                                           value.data.cpu().numpy(), epoch)
                    tflogger.histo_summary(tag + '/grad',
                                           value.grad.data.cpu().numpy(),
                                           epoch)
                # -------------------------------------
                # end tflogger

                # ================================================================== #
                #                        Early stopping                         #
                # ================================================================== #
                # early_stopping needs the validation loss to check if it has decresed,
                # and if it has, it will make a checkpoint of the current model
                early_stopping(val_metrics[1], model)

                if early_stopping.early_stop:
                    print("Early stopping")
                    break
        #==========================================================#
        #End of k-fold
        # ==========================================================#

    # -------------------------------------------------------
    # testing
    # -------------------------------------------------------
    elif config.mode == 'test':
        test_files = pd.read_csv("./input/sample_submission.csv")
        test_gen = HumanDataset(test_files,
                                config.test_data,
                                augument=False,
                                mode="test")
        test_loader = DataLoader(test_gen,
                                 1,
                                 shuffle=False,
                                 pin_memory=True,
                                 num_workers=4)

        # checkpoint_path = os.path.join(config.best_models,'{0}_fold_{1}_model_best_loss.pth.tar'.format(config.model_name, fold))
        checkpoint_path = os.path.join(
            config.weights, config.model_name, 'fold_{0}'.format(fold),
            'checkpoint_{}.pth.tar'.format(config.checkpoint))
        best_model = torch.load(checkpoint_path)
        # best_model = torch.load("checkpoints/bninception_bcelog/0/checkpoint.pth.tar")
        model.load_state_dict(best_model["state_dict"])
        thresholds = [
            -0.13432257, -0.4642075, -0.50726506, -0.49715518, -0.41125674,
            0.11581507, -1.0143597, -0.18461785, -0.61600877, -0.47275479,
            -0.9142859, -0.44323673, -0.58404387, -0.22959213, -0.26110631,
            -0.43723898, -0.97624685, -0.44612319, -0.4492785, -0.56681327,
            -0.16156543, -0.12577745, -0.75476121, -0.91473052, -0.53361931,
            -0.19337344, -0.0857145, -0.45739976
        ]

        # thresholds = [-0.27631527, -0.31156957, -0.61893745, -1.01863398, -0.3141709,  -0.14000374,
        #               -0.6285302,  -0.43241383, -1.60594984, -0.14425374, -0.03979607, -0.25717957,
        #               -0.84905692, -0.37668712,  1.3710663,  -0.11193908, -0.81109447,  0.72506607,
        #               -0.05454339, -0.47056617, -0.16024197, -0.44002794, -0.65929407, -1.00900269,
        #               -0.86197429, -0.12346229, -0.4946575,  -0.52420557]
        test(test_loader, model, thresholds)
        print('Test successful!')
Exemplo n.º 5
0
def main():
    fold = 8
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")

    # 4.2 get model
    model = get_net()
    model.cuda()

    # criterion
    optimizer = optim.SGD(model.parameters(),
                          lr=config.lr,
                          momentum=0.9,
                          weight_decay=1e-4)  #,nesterov=True)
    criterion = nn.BCEWithLogitsLoss().cuda()
    #criterion = FocalLoss().cuda()
    #criterion = F1Loss().cuda()
    start_epoch = 0
    best_loss = 999
    best_f1 = 0
    best_results = [np.inf, 0]
    val_metrics = [np.inf, 0]
    resume = False
    #all_files = pd.read_csv("./train.csv")
    train_df = pd.read_csv("./train_appended2.csv")
    train_df_orig = pd.read_csv("./total_train.csv")
    """print (type(train_df_orig))
    lows = [15,15,15,8,9,10,8,9,10,8,9,10,17,20,24,26,15,27,15,20,24,17,8,15,27,27,27]
    for i in lows:
        target = str(i)
        indicies = train_df_orig.loc[train_df_orig['Target'] == target].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.startswith(target+" ")].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.endswith(" "+target)].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.contains(" "+target+" ")].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
    #print(train_df)
    #input()"""
    test_files = pd.read_csv("./sample_submission.csv")
    train_data_list, val_data_list = train_test_split(train_df,
                                                      test_size=0.13,
                                                      random_state=2050)
    train_data_list_fake, val_data_list_fake = train_test_split(
        train_df_orig, test_size=0.01, random_state=2050)

    # load dataset
    train_gen = HumanDataset(train_data_list, config.train_data, mode="train")
    train_loader = DataLoader(train_gen,
                              batch_size=config.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=4)

    val_gen = HumanDataset(val_data_list,
                           config.train_data,
                           augument=False,
                           mode="train")
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=4)

    test_gen = HumanDataset(test_files,
                            config.test_data,
                            augument=False,
                            mode="test")
    test_loader = DataLoader(test_gen,
                             1,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=4)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1)
    start = timer()

    #train
    for epoch in range(0, config.epochs):
        scheduler.step(epoch)
        # train
        lr = get_learning_rate(optimizer)
        train_metrics = train(train_loader, model, criterion, optimizer, epoch,
                              val_metrics, best_results, start)
        # val
        val_metrics = evaluate(val_loader, model, criterion, epoch,
                               train_metrics, best_results, start)
        # check results
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0], best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1], best_results[1])
        # save model
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_loss": best_results[0],
                "optimizer": optimizer.state_dict(),
                "fold": fold,
                "best_f1": best_results[1],
            }, is_best_loss, is_best_f1, fold)
        # print logs
        print('\r', end='', flush=True)
        log.write('%s  %5.1f %6.1f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8],str(best_results[1])[:8],
                time_to_str((timer() - start),'min'))
            )
        log.write("\n")
        time.sleep(0.01)
        model.load_state_dict(
            torch.load(
                'checkpoints/best_models/%s_fold_%d_model_best_f1.pth.tar' %
                (config.model_name, fold))['state_dict'])

        model.cuda()

        criterion = nn.BCEWithLogitsLoss().cuda()
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=0.5,
                                      patience=2,
                                      min_lr=1e-5)

        train_gen = HumanDataset(X_train, y_train, augment=True)
        val_gen = HumanDataset(X_val, y_val, augment=False)

        train_loader = torch.utils.data.DataLoader(
            train_gen,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=6,
            pin_memory=True)
        val_loader = torch.utils.data.DataLoader(val_gen,
                                                 batch_size=config.batch_size,
                                                 num_workers=6,
                                                 pin_memory=True)
        #
        allPred = featExt(val_loader, model)
        break
Exemplo n.º 7
0
        # train_data_list
        train_data_list = train_data_list.reindex(train_names)
        # 57150 -> 29016
        # reset index
        train_data_list = train_data_list.rename_axis('Id').reset_index()
    with open(os.path.join("./input/protein-trainval-split", 'val_names.txt'), 'r') as text_file:
        val_names = text_file.read().split(',')
        val_data_list = all_files[all_files['Id'].isin(val_names)]

    # 4.2 get model
    model = get_net()
    model.cuda()
    fold = 0

    # load dataset
    train_gen = HumanDataset(train_data_list, config.train_data, mode="train")
    train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4)

    val_gen = HumanDataset(val_data_list, config.train_data, augument=False, mode="train")
    val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4)


    # checkpoint_path = os.path.join(config.best_models,'{0}_fold_{1}_model_best_loss.pth.tar'.format(config.model_name, fold))
    checkpoint_path = os.path.join(config.weights, config.model_name, 'fold_{0}'.format(fold),
                                   'checkpoint_{}.pth.tar'.format(config.checkpoint))
    best_model = torch.load(checkpoint_path)
    #best_model = torch.load("checkpoints/bninception_bcelog/0/checkpoint.pth.tar")
    model.load_state_dict(best_model["state_dict"])


    preds,y = validate(val_loader,model)
Exemplo n.º 8
0
def main():
    fold = config.fold
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")

    # 4.2 get model
    model = get_net()
    model.cuda()
    if config.is_train_after_crash:
        best_model_name = config.weights + config.model_name + os.sep + str(
            fold - 10) + os.sep + "checkpoint.pth.tar"
        best_model = torch.load(best_model_name)
        print(best_model_name)
        model.load_state_dict(best_model["state_dict"])
        best_results = [np.inf, 0]
        val_metrics = [np.inf, 0]
        best_results[0] = best_model["best_loss"]
        best_results[1] = best_model["best_f1"]
    else:
        best_results = [np.inf, 0]
        val_metrics = [np.inf, 0]
    print(best_results)
    train_files = pd.read_csv(config.train_csv)
    external_files = pd.read_csv(config.external_csv)
    test_files = pd.read_csv(config.test_csv)
    all_files, test_files, weight_log = process_df(train_files, external_files,
                                                   test_files)
    # train_data_list,val_data_list = train_test_split(all_files,test_size = 0.13,random_state = 2050)
    train_data_list, val_data_list = tra_val_split(all_files)
    print(len(all_files))
    print(len(train_data_list))
    print(len(val_data_list))
    # train_data_list = train_data_list.iloc[np.arange(10000)]
    # val_data_list = val_data_list.iloc[np.arange(1000)]

    # load dataset
    train_gen = HumanDataset(train_data_list, mode="train")
    sampler = WeightedRandomSampler(
        train_data_list['freq'].values,
        num_samples=int(len(train_data_list) * config.multiply),
        replacement=True)
    train_loader = DataLoader(train_gen,
                              batch_size=config.batch_size,
                              drop_last=True,
                              sampler=sampler,
                              pin_memory=True,
                              num_workers=6)
    # train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=6)

    val_gen = HumanDataset(val_data_list, augument=False, mode="train")
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            drop_last=True,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=6)

    test_gen = HumanDataset(test_files, augument=False, mode="test")
    test_loader = DataLoader(test_gen,
                             1,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=6)

    search_gen = HumanDataset(val_data_list, augument=False, mode="train")
    search_loader = DataLoader(search_gen,
                               batch_size=config.batch_size * 4,
                               drop_last=False,
                               shuffle=False,
                               pin_memory=True,
                               num_workers=6)

    # optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=1e-4, amsgrad=True)
    optimizer = optim.SGD(model.parameters(),
                          lr=config.lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    criterion = nn.BCEWithLogitsLoss().cuda()
    # criterion = nn.BCEWithLogitsLoss(torch.from_numpy(process_loss_weight(weight_log)).float()).cuda()
    # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs, eta_min=4e-8)
    # scheduler = lr_scheduler.StepLR(optimizer,step_size=6,gamma=0.1)
    # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=0, threshold=1e-3)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[6, 13, 20],
                                         gamma=0.1)
    start = timer()

    # train
    if config.is_train:
        for epoch in range(0, config.epochs):
            scheduler.step(epoch)
            # train
            lr = get_learning_rate(optimizer)
            train_metrics = train(train_loader, model, criterion, optimizer,
                                  epoch, val_metrics, best_results, start, lr)
            # val
            val_metrics = evaluate(val_loader, model, criterion, epoch,
                                   train_metrics, best_results, start)
            # check results
            is_best_loss = val_metrics[0] < best_results[0]
            best_results[0] = min(val_metrics[0], best_results[0])
            is_best_f1 = val_metrics[1] > best_results[1]
            best_results[1] = max(val_metrics[1], best_results[1])
            # scheduler.step(val_metrics[0])
            # save model
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model_name": config.model_name,
                    "state_dict": model.state_dict(),
                    "best_loss": best_results[0],
                    "optimizer": optimizer.state_dict(),
                    "fold": fold,
                    "best_f1": best_results[1],
                }, is_best_loss, is_best_f1, fold)
            # print logs
            print('\r', end='', flush=True)
            log.write('%s  %5.1f %6.1f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                    "best", epoch + 1, epoch + 1,
                    train_metrics[0], train_metrics[1],
                    val_metrics[0], val_metrics[1],
                    str(best_results[0])[:8],str(best_results[1])[:8],
                    time_to_str((timer() - start),'min'))
                )
            log.write("\n")
            time.sleep(0.01)

    if config.is_search_thres:
        best_model_name = "%s/%s_fold_%s_model_best_%s.pth.tar" % (
            config.best_models, config.model_name, str(fold), config.best)
        # best_model_name = config.weights + config.model_name + os.sep +str(fold) + os.sep + "checkpoint.pth.tar"
        print(best_model_name)
        best_model = torch.load(best_model_name)
        model.load_state_dict(best_model["state_dict"])
        search_thresholds(search_loader, model)

    if config.is_test:
        knums = config.threshold_factor
        for knum in knums:
            for f in range(5):
                best_model_name = "%s/%s_fold_%s_model_best_%s.pth.tar" % (
                    config.best_models, config.model_name, str(fold + f),
                    config.best)
                # best_model_name = config.weights + config.model_name + os.sep +str(fold) + os.sep + "checkpoint.pth.tar"
                print(best_model_name)
                best_model = torch.load(best_model_name)
                model.load_state_dict(best_model["state_dict"])
                test(test_loader, model, (fold + f), knum)
Exemplo n.º 9
0
def main():
    fold = 0
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")

    # 4.2 get model
    model = get_net()
    model.cuda()
    # load old weight trained model
    #model.load_state_dict(torch.load("{}/{}_fold_{}_model_best_loss.pth.tar".format(config.best_models,config.model_name,str(fold)))["state_dict"])

    start_epoch = 0
    best_loss = 999
    best_f1 = 0
    best_results = [np.inf, 0]
    val_metrics = [np.inf, 0]
    resume = False
    # get train
    # train data, this data include external data
    df1 = pd.read_csv(config.train_kaggle_csv)
    df2 = pd.read_csv(config.train_external_csv)
    all_files = pd.concat([df1, df2])

    # create duplicate for low data
    # https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/74374#437548
    train_df_orig = all_files.copy()
    lows = [
        15, 15, 15, 8, 9, 10, 8, 9, 10, 8, 9, 10, 17, 20, 24, 26, 15, 27, 15,
        20, 24, 17, 8, 15, 27, 27, 27
    ]
    for i in lows:
        target = str(i)
        indicies = train_df_orig.loc[train_df_orig['Target'] == target].index
        all_files = pd.concat([all_files, train_df_orig.loc[indicies]],
                              ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.startswith(
            target + " ")].index
        all_files = pd.concat([all_files, train_df_orig.loc[indicies]],
                              ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.endswith(
            " " + target)].index
        all_files = pd.concat([all_files, train_df_orig.loc[indicies]],
                              ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.contains(
            " " + target + " ")].index
        all_files = pd.concat([all_files, train_df_orig.loc[indicies]],
                              ignore_index=True)

    del df1, df2, train_df_orig
    gc.collect()

    # compute class weight
    target = all_files.apply(lambda x: x['Target'].split(' '), axis=1)
    y = target.tolist()
    y = MultiLabelBinarizer().fit_transform(y)
    labels_dict = dict()
    count_classes = np.sum(y, axis=0)
    for i, count in enumerate(count_classes):
        labels_dict[i] = count

    del target, y
    gc.collect()

    dampened_cw = create_class_weight(labels_dict)[1]
    tmp = list(dampened_cw.values())
    class_weight = torch.FloatTensor(tmp).cuda()

    # criterion
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.lr,
                                 weight_decay=config.weight_decay)
    criterion = nn.BCEWithLogitsLoss(weight=class_weight).cuda()

    #print(all_files)
    test_files = pd.read_csv(config.sample_submission)
    train_data_list, val_data_list = train_test_split(all_files,
                                                      test_size=0.13,
                                                      random_state=2050)

    # load dataset
    train_gen = HumanDataset(train_data_list, config.train_data, mode="train")
    train_loader = DataLoader(train_gen,
                              batch_size=config.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=4)

    val_gen = HumanDataset(val_data_list,
                           config.train_data,
                           augument=False,
                           mode="train")
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=4)

    test_gen = HumanDataset(test_files,
                            config.test_data,
                            augument=False,
                            mode="test")
    test_loader = DataLoader(test_gen,
                             1,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=4)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1)
    start = timer()

    #train
    for epoch in range(0, config.epochs):
        scheduler.step(epoch)
        # train
        lr = get_learning_rate(optimizer)
        train_metrics = train(train_loader, model, criterion, optimizer, epoch,
                              val_metrics, best_results, start)
        # val
        val_metrics = evaluate(val_loader, model, criterion, epoch,
                               train_metrics, best_results, start)
        # check results
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0], best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1], best_results[1])
        # save model
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_loss": best_results[0],
                "optimizer": optimizer.state_dict(),
                "fold": fold,
                "best_f1": best_results[1],
            }, is_best_loss, is_best_f1, fold)
        # print logs
        print('\r', end='', flush=True)
        log.write('%s  %5.1f %6.1f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8], str(best_results[1])[:8],
                time_to_str((timer() - start), 'min'))
            )
        log.write("\n")
        time.sleep(0.01)

    best_model = torch.load("{}/{}_fold_{}_model_best_loss.pth.tar".format(
        config.best_models, config.model_name, str(fold)))
    #best_model = torch.load("checkpoints/bninception_bcelog/0/checkpoint.pth.tar")
    model.load_state_dict(best_model["state_dict"])
    test(test_loader, model, fold)