示例#1
0
def main(fold):
    mdl_path = "%s/%s_fold_%s_model_best_loss.pth.tar" % (config.best_models, config.model_name, str(fold))
    args = get_args(mdl_path)
    model = MultiModalNet("se_resnext101_32x4d", "dpn26", 0.5)
    model_dict = torch.load(args.model_path)
    model.load_state_dict(model_dict['state_dict'])

    model.to(device)
    model.eval()

    test_files = pd.read_csv("./test.csv")
    test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test")
    test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1)
    test(test_loader, model, fold)
def main():
    fold = 0
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")

    #4.2 get model
    model = MultiModalNet("dpn26", 0.5)

    #4.3 optim & criterion
    optimizer = optim.SGD(model.parameters(),
                          lr=config.lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    # optimizer = torch.optim.Adam(model.parameters(), lr = config.lr)  # 定义优化函数
    criterion = nn.CrossEntropyLoss().to(device)

    start_epoch = 0
    best_acc = 0
    best_loss = np.inf
    best_f1 = 0
    best_results = [0, np.inf, 0]
    val_metrics = [0, np.inf, 0]
    resume = False
    if resume:
        checkpoint = torch.load(
            r'./checkpoints/best_models/seresnext101_dpn92_defrog_multimodal_fold_0_model_best_loss.pth.tar'
        )
        best_acc = checkpoint['best_acc']
        best_loss = checkpoint['best_loss']
        best_f1 = checkpoint['best_f1']
        start_epoch = checkpoint['epoch']

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)

    all_files = pd.read_csv("./train.csv")
    test_files = pd.read_csv("./test.csv")
    train_data_list, val_data_list = train_test_split(
        all_files, test_size=0.1, random_state=2050)  #把训练集分为train和valid数据集

    # load dataset
    train_gen = MultiModalDataset(train_data_list,
                                  config.train_data,
                                  config.train_vis,
                                  mode="train")
    train_loader = DataLoader(
        train_gen,
        batch_size=config.batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=1)  #num_worker is limited by shared memory in Docker!

    val_gen = MultiModalDataset(val_data_list,
                                config.train_data,
                                config.train_vis,
                                augument=False,
                                mode="train")
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=1)

    test_gen = MultiModalDataset(test_files,
                                 config.test_data,
                                 config.test_vis,
                                 augument=False,
                                 mode="test")
    test_loader = DataLoader(test_gen,
                             1,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=1)

    #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
    #n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
    #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
    start = timer()

    arr_loss = np.zeros((30, len(train_loader)))
    val_accuracy = np.zeros((30, 1))

    #train
    for epoch in range(0, config.epochs):
        scheduler.step(epoch)
        # train
        train_metrics = train(train_loader, model, criterion, optimizer, epoch,
                              val_metrics, best_results, start)
        # val
        val_metrics = evaluate(val_loader, model, criterion, epoch,
                               train_metrics, best_results, start,
                               val_accuracy)
        # check results
        is_best_acc = val_metrics[0] > best_results[0]
        best_results[0] = max(val_metrics[0], best_results[0])
        is_best_loss = val_metrics[1] < best_results[1]
        best_results[1] = min(val_metrics[1], best_results[1])
        is_best_f1 = val_metrics[2] > best_results[2]
        best_results[2] = max(val_metrics[2], best_results[2])
        # save model
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_acc": best_results[0],
                "best_loss": best_results[1],
                "optimizer": optimizer.state_dict(),
                "fold": fold,
                "best_f1": best_results[2],
            }, is_best_acc, is_best_loss, is_best_f1, fold)
        # print logs
        print('\r', end='', flush=True)
        log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],train_metrics[2],
                val_metrics[0],val_metrics[1],val_metrics[2],
                str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                time_to_str((timer() - start),'min'))
            )
        log.write("\n")
        time.sleep(0.01)

    best_model = torch.load("%s/%s_fold_%s_model_best_acc.pth.tar" %
                            (config.best_models, config.model_name, str(fold)))
    model.load_state_dict(best_model["state_dict"])
    test(test_loader, model, fold)
示例#3
0
def main():
    fold = 0  #meaningless
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    # if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)):#checkpoints
    # os.makedirs(config.weights + config.model_name + os.sep +str(fold))
    # if not os.path.exists(config.best_models):#best model
    # os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")

    #4.2 get model
    basenet = "se_resnext50_32x4d"
    model = MultiModalNet(basenet, "dpn26", 0.5)  #drop out = 0.5

    #4.3 optim & criterion
    optimizer = optim.SGD(model.parameters(),
                          lr=config.lr,
                          momentum=0.9,
                          weight_decay=1e-4)  #weight_decay为L2 regularization
    criterion = nn.CrossEntropyLoss().to(device)

    start_epoch = 0
    best_acc = 0
    best_loss = np.inf
    best_f1 = 0
    best_results = [0, np.inf, 0]
    val_metrics = [0, np.inf, 0]

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)

    all_files = pd.read_csv("./train.csv")
    test_files = pd.read_csv("./test.csv")
    train_data_list, val_data_list = train_test_split(all_files,
                                                      test_size=0.1,
                                                      random_state=1996)

    # load dataset
    train_gen = MultiModalDataset(train_data_list,
                                  config.train_data,
                                  config.train_vis,
                                  mode="train")
    train_loader = DataLoader(
        train_gen,
        batch_size=config.batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=1)  #num_worker is limited by shared memory in Docker!

    val_gen = MultiModalDataset(val_data_list,
                                config.train_data,
                                config.train_vis,
                                augument=False,
                                mode="train")
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=1)

    test_gen = MultiModalDataset(test_files,
                                 config.test_data,
                                 config.test_vis,
                                 augument=False,
                                 mode="test")
    test_loader = DataLoader(test_gen,
                             1,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=1)

    #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
    #n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
    #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
    start = timer()

    #model.load_state_dict(torch.load('checkpoints/se_resnext50_32x4d_fold_0_checkpoint.pth')['state_dict'])

    for epoch in range(0, config.epochs):
        scheduler.step(epoch)
        #train
        train_metrics = train(train_loader, model, criterion, optimizer, epoch,
                              val_metrics, best_results, start)
        # val
        val_metrics = evaluate(val_loader, model, criterion, epoch,
                               train_metrics, best_results, start)
        # check results
        is_best_acc = val_metrics[0] > best_results[0]
        best_results[0] = max(val_metrics[0], best_results[0])
        is_best_loss = val_metrics[1] < best_results[1]
        best_results[1] = min(val_metrics[1], best_results[1])
        is_best_f1 = val_metrics[2] > best_results[2]
        best_results[2] = max(val_metrics[2], best_results[2])
        # save model
        #只有当时best_acc,best_f1,best_loss时才分别存
        save_checkpoint(
            {
                "basenet": basenet,
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_acc": best_results[0],
                "best_loss": best_results[1],
                "optimizer": optimizer.state_dict(),
                "fold": fold,
                "best_f1": best_results[2],
            }, is_best_acc, is_best_loss, is_best_f1, fold)
        # print logs
        print('\r', end='', flush=True)
        log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
          "best", epoch, epoch,
          train_metrics[0], train_metrics[1],train_metrics[2],
          val_metrics[0],val_metrics[1],val_metrics[2],
          str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
          time_to_str((timer() - start),'min'))
         )
        log.write("\n")
        time.sleep(0.01)
示例#4
0
    new_state_dict[name] = v
model1.load_state_dict(new_state_dict)

model2 = MultiModalNet('se_resnext101_32x4d', 'DPN26', 0.5)
checkpoint2 = torch.load(
    'checkpoints/se_resnext101_32x4d_fold_0_checkpoint.pth')
new_state_dict = OrderedDict()
for k, v in checkpoint2['state_dict'].items():
    name = k[7:]  # remove module.
    new_state_dict[name] = v
model2.load_state_dict(new_state_dict)

# if torch.cuda.device_count() > 1:
model1 = nn.DataParallel(model1)
model2 = nn.DataParallel(model2)
model1.to(device)
model2.to(device)
model1.eval()
model2.eval()
torch.backends.cudnn.benchmark = True

test_files = pd.read_csv("./test.csv")
test_gen = MultiModalDataset(test_files,
                             config.test_data,
                             config.test_vis,
                             augument=False,
                             mode="test")
test_loader = DataLoader(test_gen,
                         batch_size=1,
                         shuffle=False,
                         pin_memory=True,
示例#5
0
def main():
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    for fold in range(config.FOLD):
        if not os.path.exists(config.weights + config.model_name + os.sep +
                              str(fold)):
            os.makedirs(config.weights + config.model_name + os.sep +
                        str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")
    # with open('../data/train_lgb.pkl', 'rb') as f:
    #     magic_trains = pickle.load(f)
    # with open('../data/test_lgb.pkl', 'rb') as f:
    #     magic_tests = pickle.load(f)
    # resume = False
    # if resume:
    #     checkpoint = torch.load(r'./checkpoints/best_models/seresnext101_dpn92_defrog_multimodal_fold_0_model_best_loss.pth.tar')
    #     best_acc = checkpoint['best_acc']
    #     best_loss = checkpoint['best_loss']
    #     best_f1 = checkpoint['best_f1']
    #     start_epoch = checkpoint['epoch']

    start = timer()
    # from torchsummary import summary
    # print(summary(model, [(3, 100, 100), (7*26, 24)]))
    all_files = pd.read_csv("../data/train.csv")
    all_files = all_files.sample(frac=1, random_state=666)
    test_files = pd.read_csv("../data/test.csv")
    max_epoch = config.epochs
    if config.debug:
        all_files = all_files.iloc[:1000]
        test_files = test_files.iloc[:100]
        config.batch_size = 2
        max_epoch = 1
    train_label = np.array(all_files['Target'])
    if config.OOF:
        result = np.zeros((len(all_files), 9))
        # print(result.shape)
        skf = StratifiedKFold(n_splits=config.FOLD,
                              random_state=2019,
                              shuffle=False)
        for fold, (train_idx,
                   val_idx) in enumerate(skf.split(all_files, train_label)):
            print('fold:', fold)
            val_data_list = all_files.iloc[val_idx]

            # load dataset
            val_gen = MultiModalDataset(val_data_list,
                                        config.train_data,
                                        config.train_vis,
                                        augument=False,
                                        mode="train")
            val_loader = DataLoader(val_gen,
                                    batch_size=config.batch_size,
                                    shuffle=False,
                                    pin_memory=True,
                                    num_workers=1)

            best_model = torch.load(
                "%s/%s_fold_%s_model_best_acc.pth.tar" %
                (config.best_models, config.model_name, str(fold)))
            model = MultiModalNet(drop=0.5)
            if torch.cuda.device_count() > 1:
                model = nn.DataParallel(model)
            model.to(device)
            model.eval()
            model.load_state_dict(best_model["state_dict"])
            result_oof = []
            with torch.no_grad():
                for i, (images, (visit, ),
                        target) in tqdm(enumerate(val_loader)):

                    image_var = images.to(device)
                    # print(image_var.shape)
                    # magic = magic.to(device)
                    visit = visit.to(device)
                    indx_target = target.clone()
                    target = torch.from_numpy(
                        np.array(target)).float().to(device)
                    y_oof = np.array(
                        F.softmax(model(image_var, visit)).cpu().data.numpy())
                    # print(y_oof.shape)
                    result_oof.extend(y_oof)
            result_oof = np.array(result_oof)
            print(len(val_idx), result_oof.shape)
            result[val_idx] = result_oof
        print(result.shape)
        with open("../data/oof2.pkl", 'wb') as f:
            pickle.dump(result, f)

    if config.train and config.FOLD > 1:
        # train_data_list,val_data_list = train_test_split(all_files, test_size=0.1, random_state = 2050)
        skf = StratifiedKFold(n_splits=config.FOLD,
                              random_state=2019,
                              shuffle=False)
        for fold, (train_idx,
                   val_idx) in enumerate(skf.split(all_files, train_label)):
            print('fold:', fold)
            train_data_list = all_files.iloc[train_idx]
            val_data_list = all_files.iloc[val_idx]
            # train_magic = magic_trains.iloc[train_idx]
            # val_magic = magic_trains.iloc[val_idx]
            # load dataset
            train_gen = MultiModalDataset(train_data_list,
                                          config.train_data,
                                          config.train_vis,
                                          mode="train")
            train_loader = DataLoader(
                train_gen,
                batch_size=config.batch_size,
                shuffle=True,
                pin_memory=True,
                num_workers=1
            )  #num_worker is limited by shared memory in Docker!

            val_gen = MultiModalDataset(val_data_list,
                                        config.train_data,
                                        config.train_vis,
                                        augument=False,
                                        mode="train")
            val_loader = DataLoader(val_gen,
                                    batch_size=config.batch_size,
                                    shuffle=False,
                                    pin_memory=True,
                                    num_workers=1)

            start_epoch = 0
            best_acc = 0
            best_loss = np.inf
            best_f1 = 0
            best_results = [0, np.inf, 0]
            val_metrics = [0, np.inf, 0]
            #model
            # 4.2 get model
            model = MultiModalNet(drop=0.5)
            if fold == 0:
                total_num = sum(p.numel() for p in model.parameters())
                trainable_num = sum(p.numel() for p in model.parameters()
                                    if p.requires_grad)
                print('Total', total_num, 'Trainable', trainable_num)
            # 4.3 optim & criterion
            optimizer = Nadam(model.parameters(), lr=5e-4)
            #torch.optim.Adamax(model.parameters(), 0.001)
            # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)
            criterion = nn.CrossEntropyLoss().to(device)
            # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25)
            scheduler = lr_scheduler.MultiStepLR(optimizer, [6, 12, 18],
                                                 gamma=0.5)
            # lr_scheduler.ReduceLROnPlateau(optimizer)
            # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25)
            # n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
            # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
            if torch.cuda.device_count() > 1:
                model = nn.DataParallel(model)
            model.to(device)

            #train
            best_acc_epoch = 0
            for epoch in range(0, max_epoch):
                if epoch - best_acc_epoch > 5:
                    break
                scheduler.step(epoch)
                # train
                # train_metrics = None
                train_metrics = train(train_loader, model, criterion,
                                      optimizer, epoch, val_metrics,
                                      best_results, start)
                # val
                val_metrics = evaluate(val_loader, model, criterion, epoch,
                                       train_metrics, best_results, start)
                # check results
                is_best_acc = val_metrics[0] > best_results[0]
                if is_best_acc:
                    best_acc_epoch = epoch
                best_results[0] = max(val_metrics[0], best_results[0])
                is_best_loss = val_metrics[1] < best_results[1]
                best_results[1] = min(val_metrics[1], best_results[1])
                is_best_f1 = val_metrics[2] > best_results[2]
                best_results[2] = max(val_metrics[2], best_results[2])
                # save model
                save_checkpoint(
                    {
                        "epoch": epoch + 1,
                        "model_name": config.model_name,
                        "state_dict": model.state_dict(),
                        "best_acc": best_results[0],
                        "best_loss": best_results[1],
                        "optimizer": optimizer.state_dict(),
                        "fold": fold,
                        "best_f1": best_results[2],
                    }, is_best_acc, is_best_loss, is_best_f1, fold)
                # print logs
                print('\r', end='', flush=True)
                log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                        "best", epoch, epoch,
                        train_metrics[0], train_metrics[1],train_metrics[2],
                        val_metrics[0],val_metrics[1],val_metrics[2],
                        str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                        time_to_str((timer() - start),'min'))
                    )
                log.write("\n")
                time.sleep(0.01)
    if config.train and config.FOLD == 1:
        train_data_list, val_data_list, train_magic, val_magic = train_test_split(
            all_files, magic_trains, test_size=0.1, random_state=2050)
        # skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False)
        # for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)):
        #     print('fold:', fold)
        #     train_data_list = all_files.iloc[train_idx]
        #     val_data_list = all_files.iloc[val_idx]
        # load dataset
        train_gen = MultiModalDataset(train_data_list,
                                      train_magic,
                                      config.train_data,
                                      config.train_vis,
                                      mode="train")
        train_loader = DataLoader(
            train_gen,
            batch_size=config.batch_size,
            shuffle=True,
            pin_memory=True,
            num_workers=1)  #num_worker is limited by shared memory in Docker!

        val_gen = MultiModalDataset(val_data_list,
                                    val_magic,
                                    config.train_data,
                                    config.train_vis,
                                    augument=False,
                                    mode="train")
        val_loader = DataLoader(val_gen,
                                batch_size=config.batch_size,
                                shuffle=False,
                                pin_memory=True,
                                num_workers=1)

        start_epoch = 0
        best_acc = 0
        best_loss = np.inf
        best_f1 = 0
        best_results = [0, np.inf, 0]
        val_metrics = [0, np.inf, 0]
        #model
        # 4.2 get model
        model = MultiModalNet(drop=0.5)
        # 4.3 optim & criterion
        optimizer = torch.optim.Adamax(model.parameters(), 0.001)
        # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)
        criterion = nn.CrossEntropyLoss().to(device)
        # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
        # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25)
        # n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
        # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model.to(device)

        #train
        best_acc_epoch = 0
        for epoch in range(0, max_epoch):
            if epoch - best_acc_epoch > 5:
                break
            scheduler.step(epoch)
            # train
            # train_metrics = None
            train_metrics = train(train_loader, model, criterion, optimizer,
                                  epoch, val_metrics, best_results, start)
            # val
            val_metrics = evaluate(val_loader, model, criterion, epoch,
                                   train_metrics, best_results, start)
            # check results
            is_best_acc = val_metrics[0] > best_results[0]
            if is_best_acc:
                best_acc_epoch = epoch
            best_results[0] = max(val_metrics[0], best_results[0])
            is_best_loss = val_metrics[1] < best_results[1]
            best_results[1] = min(val_metrics[1], best_results[1])
            is_best_f1 = val_metrics[2] > best_results[2]
            best_results[2] = max(val_metrics[2], best_results[2])
            # save model
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model_name": config.model_name,
                    "state_dict": model.state_dict(),
                    "best_acc": best_results[0],
                    "best_loss": best_results[1],
                    "optimizer": optimizer.state_dict(),
                    "fold": fold,
                    "best_f1": best_results[2],
                }, is_best_acc, is_best_loss, is_best_f1, fold)
            # print logs
            print('\r', end='', flush=True)
            log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                    "best", epoch, epoch,
                    train_metrics[0], train_metrics[1],train_metrics[2],
                    val_metrics[0],val_metrics[1],val_metrics[2],
                    str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                    time_to_str((timer() - start),'min'))
                )
            log.write("\n")
            time.sleep(0.01)
    if config.predict:
        # test data
        models = []
        for fold in range(5):
            best_model = torch.load(
                "%s/%s_fold_%s_model_best_acc.pth.tar" %
                (config.best_models, config.model_name, str(fold)))
            model = MultiModalNet(drop=0.5)
            if torch.cuda.device_count() > 1:
                model = nn.DataParallel(model)
            model.to(device)
            model.eval()
            model.load_state_dict(best_model["state_dict"])
            models.append(model)
        test_gen = MultiModalDataset(test_files,
                                     config.test_data,
                                     config.test_vis,
                                     augument=False,
                                     mode="test",
                                     TTA=True)
        test_loader = DataLoader(test_gen,
                                 batch_size=config.batch_size,
                                 shuffle=False,
                                 pin_memory=True,
                                 num_workers=1)
        # predict
        test(test_loader, models)
示例#6
0
def main():
    fold = 0
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")

    #4.2 get model
    # model=MultiModalNet("se_resnext101_32x4d","dpn107",0.5)
    model = MultiModalNet("se_resnext50_32x4d", "dpn26", 0.5)

    #4.3 optim & criterion
    #optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().to(device)  #多任务处理,所以选择交叉熵

    # optimizer = optim.SGD([{'params': model.base.parameters()},
    #                        {'params': model.classifier.parameters(), 'lr': config.lr*0.1}], lr=1e-5,momentum=0.9,weight_decay=1e-4)
    #betas = (0.9,0.999), eps = 1e-08,
    optimizer = optim.Adam(
        model.parameters(),
        lr=config.lr,
        betas=(0.9, 0.999),
        weight_decay=1e-4)  #betas = (0.9,0.999), eps = 1e-08,

    # class SGD(Optimizer):
    #     def __init__(self, params, lr=required, momentum=0, dampening=0, weight_decay1=0, weight_decay2=0, nesterov=False):
    #         defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
    #                         weight_decay1=weight_decay1, weight_decay2=weight_decay2, nesterov=nesterov)
    #         if nesterov and (momentum <= 0 or dampening != 0):
    #             raise ValueError("Nesterov momentum requires a momentum and zero dampening")
    #         super(SGD, self).__init__(params, defaults)

    #     def __setstate__(self, state):
    #         super(SGD, self).__setstate__(state)
    #         for group in self.param_groups:
    #             group.setdefault('nesterov', False)

    #     def step(self, closure=None):
    #         """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """
    #         loss = None
    #         if closure is not None:
    #             loss = closure()

    #         for group in self.param_groups:
    #             weight_decay1 = group['weight_decay1']
    #             weight_decay2 = group['weight_decay2']
    #             momentum = group['momentum']
    #             dampening = group['dampening']
    #             nesterov = group['nesterov']

    #             for p in group['params']:
    #                 if p.grad is None:
    #                     continue
    #                 d_p = p.grad.data
    #                 if weight_decay1 != 0:
    #                     d_p.add_(weight_decay1, torch.sign(p.data))
    #                 if weight_decay2 != 0:
    #                     d_p.add_(weight_decay2, p.data)
    #                 if momentum != 0:
    #                     param_state = self.state[p]
    #                     if 'momentum_buffer' not in param_state:
    #                         buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
    #                         buf.mul_(momentum).add_(d_p)
    #                     else:
    #                         buf = param_state['momentum_buffer']
    #                         buf.mul_(momentum).add_(1 - dampening, d_p)
    #                     if nesterov:
    #                         d_p = d_p.add(momentum, buf)
    #                     else:
    #                         d_p = buf

    #                 p.data.add_(-group['lr'], d_p)

    #         return loss

    start_epoch = 0
    best_acc = 0
    best_loss = np.inf
    best_f1 = 0
    best_results = [0, np.inf, 0]
    val_metrics = [0, np.inf, 0]
    resume = False
    if resume:
        checkpoint = torch.load(
            r'./checkpoints/best_models/seresnext101_dpn107_defrog_multimodal_fold_0_model_best_loss.pth.tar'
        )
        best_acc = checkpoint['best_acc']
        best_loss = checkpoint['best_loss']
        best_f1 = checkpoint['best_f1']
        start_epoch = checkpoint['epoch']

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)

    all_files = pd.read_csv("./train.csv")
    test_files = pd.read_csv("./test.csv")
    train_data_list, val_data_list = train_test_split(all_files,
                                                      test_size=0.1,
                                                      random_state=2050)

    # load dataset
    train_gen = MultiModalDataset(train_data_list,
                                  config.train_data,
                                  config.train_vis,
                                  mode="train")
    train_loader = DataLoader(
        train_gen,
        batch_size=config.batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=1)  #num_worker is limited by shared memory in Docker!

    val_gen = MultiModalDataset(val_data_list,
                                config.train_data,
                                config.train_vis,
                                augument=False,
                                mode="train")
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=1)

    test_gen = MultiModalDataset(test_files,
                                 config.test_data,
                                 config.test_vis,
                                 augument=False,
                                 mode="test")
    test_loader = DataLoader(test_gen,
                             1,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=1)

    #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1,last_epoch = -1)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
    #n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
    #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
    start = timer()

    #train
    for epoch in range(0, config.epochs):
        scheduler.step(epoch)
        # train
        train_metrics = train(train_loader, model, criterion, optimizer, epoch,
                              val_metrics, best_results, start)
        # val
        val_metrics = evaluate(val_loader, model, criterion, epoch,
                               train_metrics, best_results, start)
        # check results
        is_best_acc = val_metrics[0] > best_results[0]
        best_results[0] = max(val_metrics[0], best_results[0])
        is_best_loss = val_metrics[1] < best_results[1]
        best_results[1] = min(val_metrics[1], best_results[1])
        is_best_f1 = val_metrics[2] > best_results[2]
        best_results[2] = max(val_metrics[2], best_results[2])
        # save model
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_acc": best_results[0],
                "best_loss": best_results[1],
                "optimizer": optimizer.state_dict(),
                "fold": fold,
                "best_f1": best_results[2],
            }, is_best_acc, is_best_loss, is_best_f1, fold)
        # print logs
        print('\r', end='', flush=True)
        log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],train_metrics[2],
                val_metrics[0],val_metrics[1],val_metrics[2],
                str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                time_to_str((timer() - start),'min'))
            )
        log.write("\n")
        time.sleep(0.01)

    best_model = torch.load("%s/%s_fold_%s_model_best_loss.pth.tar" %
                            (config.best_models, config.model_name, str(fold)))
    model.load_state_dict(best_model["state_dict"])
    test(test_loader, model, fold)
def main():
    fold = 0
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep +str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")
    
    #4.2 get model
    model = MultiModalNet("se_resnext50_32x4d","dpn26",0.5)  #se_resnext101_32x4d

    #4.3 optim & criterion
    optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)
    # criterion = FocalLoss(alpha=[1,1,1,1,1,1,1,1,1]).to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    start_epoch = 0
    best_acc=0
    best_loss = np.inf
    best_f1 = 0
    best_results = [0,np.inf,0]
    val_metrics = [0,np.inf,0]
    resume = False
    if resume:
        checkpoint_path = r'./checkpoints/best_models/multimodal_fold_0_model_best_loss.pth.tar'
        if not os.path.isfile(checkpoint_path):
            raise RuntimeError("=> no checkpoint found at '{}'".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path,map_location=device)
        best_acc = checkpoint['best_acc']
        best_loss = checkpoint['best_loss']
        best_f1 = checkpoint['best_f1']
        start_epoch = checkpoint['epoch']

        #args.cuda
        # if torch.cuda.is_available():
        #     model.module.load_state_dict(checkpoint['state_dict'])
        # else:
        #     model.load_state_dict(checkpoint['state_dict'])
        model.load_state_dict(checkpoint['state_dict'])
        # ft = True
        # if ft:
        #     optimizer.load_state_dict(checkpoint['optimizer'])

    # # Clear start epoch if fine-tuning
    # if args.ft:
    #     args.start_epoch = 0

    muti_gpu = False
    if torch.cuda.device_count() > 1 and muti_gpu == True:
        model = nn.DataParallel(model)
    model.to(device)

    all_files = pd.read_csv("/data/BaiDuBigData19-URFC/data/train_oversampling.csv")
    test_files = pd.read_csv("/data/BaiDuBigData19-URFC/data/test.csv")
    train_data_list,val_data_list = train_test_split(all_files, test_size=0.1, random_state = 2050)

    # load dataset
    train_gen = MultiModalDataset(train_data_list,config.train_data,config.train_vis,mode="train")
    train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=16) #num_worker is limited by shared memory in Docker!

    val_gen = MultiModalDataset(val_data_list,config.train_data,config.train_vis,augument=False,mode="train")
    val_loader = DataLoader(val_gen,batch_size=config.batch_size,shuffle=False,pin_memory=True,num_workers=16)

    test_gen = MultiModalDataset(test_files,config.test_data,config.test_vis,augument=False,mode="test")
    test_loader = DataLoader(test_gen,1,shuffle=False,pin_memory=True,num_workers=16)

    #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1)
    #如果是best_acc 的话,mode = "max" ,如果是best_loss的话,mode = "min"
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
    #n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
    #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
    start = timer()

    #train
    for epoch in range(0,config.epochs):#config.epochs
        scheduler.step(epoch)
        # train
        train_metrics = train(train_loader,model,criterion,optimizer,epoch,val_metrics,best_results,start)
        # val
        val_metrics = evaluate(val_loader,model,criterion,epoch,train_metrics,best_results,start)
        # check results
        is_best_acc = val_metrics[0] > best_results[0]
        best_results[0] = max(val_metrics[0],best_results[0])
        is_best_loss = val_metrics[1] < best_results[1]
        best_results[1] = min(val_metrics[1],best_results[1])
        is_best_f1 = val_metrics[2] > best_results[2]
        best_results[2] = max(val_metrics[2],best_results[2])
        # save model
        save_checkpoint({
                    "epoch":epoch + 1,
                    "model_name":config.model_name,
                    "state_dict":model.state_dict(),
                    "best_acc":best_results[0],
                    "best_loss":best_results[1],
                    "optimizer":optimizer.state_dict(),
                    "fold":fold,
                    "best_f1":best_results[2],
        },is_best_acc,is_best_loss,is_best_f1,fold)
        # print logs
        print('\r',end='',flush=True)
        log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],train_metrics[2],
                val_metrics[0],val_metrics[1],val_metrics[2],
                str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                time_to_str((timer() - start),'min'))
            )
        log.write("\n")
        time.sleep(0.01)

    best_model = torch.load("%s/%s_fold_%s_model_best_loss.pth.tar"%(config.best_models,config.model_name,str(fold)))
    model.load_state_dict(best_model["state_dict"])
    evaluation(test_loader,model,fold)