示例#1
0
    join("../ganData/facades_fixed", "test"))
train_data_loader = DataLoader(dataset=train_set,
                               num_workers=2,
                               batch_size=BATCH_SIZE,
                               shuffle=True)
test_data_loader = DataLoader(dataset=test_set,
                              num_workers=2,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
test_input, test_target = test_data_loader.__iter__().__next__()

real_a = torch.FloatTensor(BATCH_SIZE, IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE)
real_b = torch.FloatTensor(BATCH_SIZE, OUTPUT_CHANNEL, IMAGE_SIZE, IMAGE_SIZE)

if GPU_NUMS > 1:
    Net_G = Net_G.cuda()
    Net_D = Net_D.cuda()
    lossGAN = lossGAN.cuda()
    lossL1 = lossL1.cuda()
    lossMSE = lossMSE.cuda()

real_a = Variable(real_a.cuda() if GPU_NUMS > 1 else real_a)
real_b = Variable(real_b.cuda() if GPU_NUMS > 1 else real_b)

bar = ProgressBar(EPOCHS, len(train_data_loader), "D loss:%.3f;G loss:%.3f")
for epoch in range(EPOCHS):
    for iteration, batch in enumerate(train_data_loader, 1):
        real_a_cpu, real_b_cpu = batch[0], batch[1]
        real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu)
        real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu)
        fake_b = Net_G(real_a)
示例#2
0
def train_model(train_dataset, train_num_each, val_dataset, val_num_each):
    num_train = len(train_dataset)
    num_val = len(val_dataset)

    train_useful_start_idx = get_useful_start_idx(sequence_length,
                                                  train_num_each)

    val_useful_start_idx = get_useful_start_idx(sequence_length, val_num_each)

    num_train_we_use = len(train_useful_start_idx) // num_gpu * num_gpu
    num_val_we_use = len(val_useful_start_idx) // num_gpu * num_gpu
    # num_train_we_use = 800
    # num_val_we_use = 800

    train_we_use_start_idx = train_useful_start_idx[0:num_train_we_use]
    val_we_use_start_idx = val_useful_start_idx[0:num_val_we_use]

    train_idx = []
    for i in range(num_train_we_use):
        for j in range(sequence_length):
            train_idx.append(train_we_use_start_idx[i] + j)

    val_idx = []
    for i in range(num_val_we_use):
        for j in range(sequence_length):
            val_idx.append(val_we_use_start_idx[i] + j)

    num_train_all = len(train_idx)
    num_val_all = len(val_idx)

    print('num train start idx : {:6d}'.format(len(train_useful_start_idx)))
    print('last idx train start: {:6d}'.format(train_useful_start_idx[-1]))
    print('num of train dataset: {:6d}'.format(num_train))
    print('num of train we use : {:6d}'.format(num_train_we_use))
    print('num of all train use: {:6d}'.format(num_train_all))
    print('num valid start idx : {:6d}'.format(len(val_useful_start_idx)))
    print('last idx valid start: {:6d}'.format(val_useful_start_idx[-1]))
    print('num of valid dataset: {:6d}'.format(num_val))
    print('num of valid we use : {:6d}'.format(num_val_we_use))
    print('num of all valid use: {:6d}'.format(num_val_all))

    train_loader = DataLoader(train_dataset,
                              batch_size=train_batch_size,
                              sampler=train_idx,
                              num_workers=workers,
                              pin_memory=False)
    val_loader = DataLoader(val_dataset,
                            batch_size=val_batch_size,
                            sampler=val_idx,
                            num_workers=workers,
                            pin_memory=False)

    model = multi_lstm()
    model = DataParallel(model)
    model.load_state_dict(
        torch.load(
            'cnn_lstm_epoch_25_length_4_opt_1_mulopt_1_flip_0_crop_1_batch_200_train1_9998_train2_9987_val1_9731_val2_8752.pth'
        ))

    kl_fc_t2p = nn.Linear(7, 7)

    all_tool_to_phase = np.load('kl_fc_t2p.npy')

    kl_fc_t2p.weight.data = torch.from_numpy(
        all_tool_to_phase.astype('float32'))

    for param in kl_fc_t2p.parameters():
        param.requires_grad = True

    if use_gpu:
        model = model.cuda()
        kl_fc_t2p = kl_fc_t2p.cuda()

    criterion_1 = nn.BCEWithLogitsLoss(size_average=False)
    criterion_2 = nn.CrossEntropyLoss(size_average=False)
    criterion_3 = nn.KLDivLoss(size_average=False)
    softmax_cuda = nn.Softmax().cuda()
    sigmoid_cuda = nn.Sigmoid().cuda()

    if multi_optim == 0:
        if optimizer_choice == 0:
            optimizer = optim.SGD([model.parameters(),
                                   kl_fc_t2p.parameters()],
                                  lr=learning_rate,
                                  momentum=momentum,
                                  dampening=dampening,
                                  weight_decay=weight_decay,
                                  nesterov=use_nesterov)
            if sgd_adjust_lr == 0:
                exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                                       step_size=sgd_step,
                                                       gamma=sgd_gamma)
            elif sgd_adjust_lr == 1:
                exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(
                    optimizer, 'min')
        elif optimizer_choice == 1:
            optimizer = optim.Adam(
                [model.parameters(),
                 kl_fc_t2p.parameters()], lr=learning_rate)
    elif multi_optim == 1:
        if optimizer_choice == 0:
            optimizer = optim.SGD([
                {
                    'params': model.module.share.parameters()
                },
                {
                    'params': kl_fc_t2p.parameters()
                },
                {
                    'params': model.module.lstm.parameters(),
                    'lr': learning_rate
                },
                {
                    'params': model.module.fc.parameters(),
                    'lr': learning_rate
                },
            ],
                                  lr=learning_rate / 10,
                                  momentum=momentum,
                                  dampening=dampening,
                                  weight_decay=weight_decay,
                                  nesterov=use_nesterov)
            if sgd_adjust_lr == 0:
                exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                                       step_size=sgd_step,
                                                       gamma=sgd_gamma)
            elif sgd_adjust_lr == 1:
                exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(
                    optimizer, 'min')
        elif optimizer_choice == 1:
            optimizer = optim.Adam([
                {
                    'params': model.module.share.parameters()
                },
                {
                    'params': kl_fc_t2p.parameters()
                },
                {
                    'params': model.module.lstm.parameters(),
                    'lr': learning_rate
                },
                {
                    'params': model.module.fc.parameters(),
                    'lr': learning_rate
                },
            ],
                                   lr=learning_rate / 10)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_accuracy_1 = 0.0
    best_val_accuracy_2 = 0.0  # judge by accu2
    correspond_train_acc_1 = 0.0
    correspond_train_acc_2 = 0.0

    # 要存储2个train的准确率 2个valid的准确率 3个train 3个loss的loss, 一共12个数据要记录
    record_np = np.zeros([epochs, 10])

    for epoch in range(epochs):
        # np.random.seed(epoch)
        np.random.shuffle(train_we_use_start_idx)
        train_idx = []
        for i in range(num_train_we_use):
            for j in range(sequence_length):
                train_idx.append(train_we_use_start_idx[i] + j)

        train_loader = DataLoader(train_dataset,
                                  batch_size=train_batch_size,
                                  sampler=train_idx,
                                  num_workers=workers,
                                  pin_memory=False)

        model.train()
        train_loss_1 = 0.0
        train_loss_2 = 0.0
        train_loss_3 = 0.0
        train_corrects_1 = 0
        train_corrects_2 = 0

        train_start_time = time.time()
        for data in train_loader:
            inputs, labels_1, labels_2 = data
            if use_gpu:
                inputs = Variable(inputs.cuda())
                labels_1 = Variable(labels_1.cuda())
                labels_2 = Variable(labels_2.cuda())
            else:
                inputs = Variable(inputs)
                labels_1 = Variable(labels_1)
                labels_2 = Variable(labels_2)

            optimizer.zero_grad()

            outputs_1, outputs_2 = model.forward(inputs)

            sig_output_1 = sigmoid_cuda(outputs_1)
            soft_output_2 = softmax_cuda(outputs_2)
            sig_output_1 = Variable(sig_output_1.data, requires_grad=False)
            soft_output_2 = Variable(soft_output_2.data, requires_grad=False)
            kl_output_1 = kl_fc_t2p(sig_output_1)

            preds_1 = torch.cuda.ByteTensor(sig_output_1.data > 0.5)
            preds_1 = preds_1.long()
            train_corrects_1 += torch.sum(preds_1 == labels_1.data)
            labels_1 = Variable(labels_1.data.float())
            loss_1 = criterion_1(outputs_1, labels_1)
            loss_2 = criterion_2(outputs_2, labels_2)

            _, preds_2 = torch.max(outputs_2.data, 1)
            train_corrects_2 += torch.sum(preds_2 == labels_2.data)

            loss_3 = torch.abs(criterion_3(kl_output_1, soft_output_2))
            loss = loss_1 + loss_2 + loss_3
            loss.backward()
            optimizer.step()

            train_loss_1 += loss_1.data[0]
            train_loss_2 += loss_2.data[0]
            train_loss_3 += loss_3.data[0]

        train_elapsed_time = time.time() - train_start_time
        train_accuracy_1 = train_corrects_1 / num_train_all / 7
        train_accuracy_2 = train_corrects_2 / num_train_all
        train_average_loss_1 = train_loss_1 / num_train_all / 7
        train_average_loss_2 = train_loss_2 / num_train_all
        train_average_loss_3 = train_loss_3 / num_train_all

        # begin eval

        model.eval()
        val_loss_1 = 0.0
        val_loss_2 = 0.0
        val_loss_3 = 0.0
        val_corrects_1 = 0
        val_corrects_2 = 0

        val_start_time = time.time()
        for data in val_loader:
            inputs, labels_1, labels_2 = data
            labels_2 = labels_2[(sequence_length - 1)::sequence_length]
            if use_gpu:
                inputs = Variable(inputs.cuda(), volatile=True)
                labels_1 = Variable(labels_1.cuda(), volatile=True)
                labels_2 = Variable(labels_2.cuda(), volatile=True)
            else:
                inputs = Variable(inputs, volatile=True)
                labels_1 = Variable(labels_1, volatile=True)
                labels_2 = Variable(labels_2, volatile=True)

            if crop_type == 0 or crop_type == 1:
                outputs_1, outputs_2 = model.forward(inputs)
            elif crop_type == 5:
                inputs = inputs.permute(1, 0, 2, 3, 4).contiguous()
                inputs = inputs.view(-1, 3, 224, 224)
                outputs_1, outputs_2 = model.forward(inputs)
                outputs_1 = outputs_1.view(5, -1, 7)
                outputs_1 = torch.mean(outputs_1, 0)
                outputs_2 = outputs_2.view(5, -1, 7)
                outputs_2 = torch.mean(outputs_2, 0)
            elif crop_type == 10:
                inputs = inputs.permute(1, 0, 2, 3, 4).contiguous()
                inputs = inputs.view(-1, 3, 224, 224)
                outputs_1, outputs_2 = model.forward(inputs)
                outputs_1 = outputs_1.view(10, -1, 7)
                outputs_1 = torch.mean(outputs_1, 0)
                outputs_2 = outputs_2.view(10, -1, 7)
                outputs_2 = torch.mean(outputs_2, 0)

            sig_output_1 = sigmoid_cuda(outputs_1)
            soft_output_2 = softmax_cuda(outputs_2)
            sig_output_1 = Variable(sig_output_1.data, requires_grad=False)
            soft_output_2 = Variable(soft_output_2.data, requires_grad=False)

            kl_output_1 = (kl_fc_t2p(sig_output_1))

            outputs_2 = outputs_2[sequence_length - 1::sequence_length]
            _, preds_2 = torch.max(outputs_2.data, 1)

            preds_1 = torch.cuda.ByteTensor(sig_output_1.data > 0.5)
            preds_1 = preds_1.long()
            val_corrects_1 += torch.sum(preds_1 == labels_1.data)
            labels_1 = Variable(labels_1.data.float())
            loss_1 = criterion_1(outputs_1, labels_1)
            loss_2 = criterion_2(outputs_2, labels_2)
            val_corrects_2 += torch.sum(preds_2 == labels_2.data)

            loss_3 = torch.abs(criterion_3(kl_output_1, soft_output_2))

            val_loss_1 += loss_1.data[0]
            val_loss_2 += loss_2.data[0]
            val_loss_3 += loss_3.data[0]

        val_elapsed_time = time.time() - val_start_time
        val_accuracy_1 = val_corrects_1 / (num_val_all * 7)
        val_accuracy_2 = val_corrects_2 / num_val_we_use
        val_average_loss_1 = val_loss_1 / (num_val_all * 7)
        val_average_loss_2 = val_loss_2 / num_val_we_use
        val_average_loss_3 = val_loss_3 / num_val_all

        print('epoch: {:3d}'
              ' train time: {:2.0f}m{:2.0f}s'
              ' train accu_1: {:.4f}'
              ' train accu_2: {:.4f}'
              ' train loss_1: {:4.4f}'
              ' train loss_2: {:4.4f}'
              ' train loss_3: {:4.4f}'.format(
                  epoch, train_elapsed_time // 60, train_elapsed_time % 60,
                  train_accuracy_1, train_accuracy_2, train_average_loss_1,
                  train_average_loss_2, train_average_loss_3))
        print('epoch: {:3d}'
              ' valid time: {:2.0f}m{:2.0f}s'
              ' valid accu_1: {:.4f}'
              ' valid accu_2: {:.4f}'
              ' valid loss_1: {:4.4f}'
              ' valid loss_2: {:4.4f}'
              ' valid loss_3: {:4.4f}'.format(epoch, val_elapsed_time // 60,
                                              val_elapsed_time % 60,
                                              val_accuracy_1, val_accuracy_2,
                                              val_average_loss_1,
                                              val_average_loss_2,
                                              val_average_loss_3))

        if optimizer_choice == 0:
            if sgd_adjust_lr == 0:
                exp_lr_scheduler.step()
            elif sgd_adjust_lr == 1:
                exp_lr_scheduler.step(val_average_loss_1 + val_average_loss_2)

        if val_accuracy_2 > best_val_accuracy_2 and val_accuracy_1 > 0.95:
            best_val_accuracy_2 = val_accuracy_2
            best_val_accuracy_1 = val_accuracy_1
            correspond_train_acc_1 = train_accuracy_1
            correspond_train_acc_2 = train_accuracy_2
            best_model_wts = copy.deepcopy(model.state_dict())
        elif val_accuracy_2 == best_val_accuracy_2 and val_accuracy_1 > 0.95:
            if val_accuracy_1 > best_val_accuracy_1:
                correspond_train_acc_1 = train_accuracy_1
                correspond_train_acc_2 = train_accuracy_2
                best_model_wts = copy.deepcopy(model.state_dict())
            elif val_accuracy_1 == best_val_accuracy_1:
                if train_accuracy_2 > correspond_train_acc_2:
                    correspond_train_acc_2 = train_accuracy_2
                    correspond_train_acc_1 = train_accuracy_1
                    best_model_wts = copy.deepcopy(model.state_dict())
                elif train_accuracy_2 == correspond_train_acc_2:
                    if train_accuracy_1 > best_val_accuracy_1:
                        correspond_train_acc_1 = train_accuracy_1
                        best_model_wts = copy.deepcopy(model.state_dict())

        record_np[epoch, 0] = train_accuracy_1
        record_np[epoch, 1] = train_accuracy_2
        record_np[epoch, 2] = train_average_loss_1
        record_np[epoch, 3] = train_average_loss_2
        record_np[epoch, 4] = train_average_loss_3

        record_np[epoch, 5] = val_accuracy_1
        record_np[epoch, 6] = val_accuracy_2
        record_np[epoch, 7] = val_average_loss_1
        record_np[epoch, 8] = val_average_loss_2
        record_np[epoch, 9] = val_average_loss_3

    print('best accuracy_1: {:.4f} cor train accu_1: {:.4f}'.format(
        best_val_accuracy_1, correspond_train_acc_1))
    print('best accuracy_2: {:.4f} cor train accu_2: {:.4f}'.format(
        best_val_accuracy_2, correspond_train_acc_2))

    save_val_1 = int("{:4.0f}".format(best_val_accuracy_1 * 10000))
    save_val_2 = int("{:4.0f}".format(best_val_accuracy_2 * 10000))
    save_train_1 = int("{:4.0f}".format(correspond_train_acc_1 * 10000))
    save_train_2 = int("{:4.0f}".format(correspond_train_acc_2 * 10000))
    public_name = "cnn_lstm_klt2p" \
                  + "_epoch_" + str(epochs) \
                  + "_length_" + str(sequence_length) \
                  + "_opt_" + str(optimizer_choice) \
                  + "_mulopt_" + str(multi_optim) \
                  + "_flip_" + str(use_flip) \
                  + "_crop_" + str(crop_type) \
                  + "_batch_" + str(train_batch_size) \
                  + "_train1_" + str(save_train_1) \
                  + "_train2_" + str(save_train_2) \
                  + "_val1_" + str(save_val_1) \
                  + "_val2_" + str(save_val_2)
    model_name = public_name + ".pth"
    torch.save(best_model_wts, model_name)

    record_name = public_name + ".npy"
    np.save(record_name, record_np)

    kl_fc_t2p_name = public_name + "t2p.npy"
    kl_fc_t2p_np = kl_fc_t2p.cpu().weight.data.numpy()
    np.save(kl_fc_t2p_name, kl_fc_t2p_np)
示例#3
0
def test_model(test_dataset, test_num_each):
    num_test = len(test_dataset)
    test_idx = [i for i in range(num_test)]
    print('num of test dataset: {:6d}'.format(num_test))
    test_loader = DataLoader(
        test_dataset,
        batch_size=test_batch_size,
        sampler=test_idx,
        num_workers=workers,
        pin_memory=False
    )
    model = pure_resnet()
    model = DataParallel(model)
    model.load_state_dict(torch.load(model_name))
    if use_gpu:
        model = model.cuda()
    criterion = nn.CrossEntropyLoss(size_average=False)

    model.eval()
    test_loss = 0.0
    test_corrects = 0
    all_preds = []
    test_start_time = time.time()
    for data in test_loader:
        inputs, labels_1, labels_2 = data
        if use_gpu:
            inputs = Variable(inputs.cuda(), volatile=True)
            labels = Variable(labels_2.cuda(), volatile=True)
        else:
            inputs = Variable(inputs, volatile=True)
            labels = Variable(labels_2, volatile=True)

        if crop_type == 0 or crop_type == 1:
            outputs = model.forward(inputs)
        elif crop_type == 5:
            inputs = inputs.permute(1, 0, 2, 3, 4).contiguous()
            inputs = inputs.view(-1, 3, 224, 224)
            outputs = model.forward(inputs)
            outputs = outputs.view(5, -1, 7)
            outputs = torch.mean(outputs, 0)
        elif crop_type == 10:
            inputs = inputs.permute(1, 0, 2, 3, 4).contiguous()
            inputs = inputs.view(-1, 3, 224, 224)
            outputs = model.forward(inputs)
            outputs = outputs.view(10, -1, 7)
            outputs = torch.mean(outputs, 0)
        _, preds = torch.max(outputs.data, 1)
        for i in range(len(preds)):
            all_preds.append(preds[i])
        loss = criterion(outputs, labels)
        test_loss += loss.data[0]
        test_corrects += torch.sum(preds == labels.data)
        # print(test_corrects)
    test_elapsed_time = time.time() - test_start_time
    test_accuracy = test_corrects / num_test
    test_average_loss = test_loss / num_test


    save_test = int("{:4.0f}".format(test_accuracy * 10000))
    pred_name = model_pure_name + '_test_' + str(save_test)+'_crop_' + str(crop_type) + '.pkl'

    with open(pred_name, 'wb') as f:
        pickle.dump(all_preds, f)
    print('test elapsed: {:2.0f}m{:2.0f}s'
          ' test loss: {:4.4f}'
          ' test accu: {:.4f}'
          .format(test_elapsed_time // 60,
                  test_elapsed_time % 60,
                  test_average_loss, test_accuracy))
示例#4
0
def main():
    args = parser.parse_args()

    log_out_dir = os.path.join(RESULT_DIR, 'logs', args.out_dir,
                               'fold%d' % args.fold)
    if not os.path.exists(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(os.path.join(log_out_dir, 'log.train.txt'), mode='a')

    model_out_dir = os.path.join(RESULT_DIR, 'models', args.out_dir,
                                 'fold%d' % args.fold)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        model_out_dir))
    if not os.path.exists(model_out_dir):
        os.makedirs(model_out_dir)

    # set cuda visible device
    if not args.all_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True
    # cudnn.enabled = False

    # set random seeds
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    np.random.seed(0)

    model_params = {}
    model_params['architecture'] = args.arch
    model_params['num_classes'] = args.num_classes
    model_params['in_channels'] = args.in_channels
    if 'efficientnet' in args.arch:
        model_params['image_size'] = args.img_size
        model_params['encoder'] = args.effnet_encoder

    model = init_network(model_params)

    if args.load_state_dict_path is not None:
        if args.load_state_dict_path == 'use-img-level-densenet-ckpt':
            model_dir = '../output/models/densenet121_1024_all_data__obvious_neg__gradaccum_20__start_lr_3e6'
            pretrained_ckpt_path = os.path.join(f'{model_dir}',
                                                f'fold{args.fold}',
                                                'final.pth')
        else:
            pretrained_ckpt_path = args.load_state_dict_path
        init_pretrained = torch.load(pretrained_ckpt_path)
        model.load_state_dict(init_pretrained['state_dict'])

    if args.all_gpus:
        model = DataParallel(model)
    model.cuda()

    # define loss function (criterion)
    try:
        criterion = eval(args.loss)().cuda()
    except:
        raise (RuntimeError("Loss {} not available!".format(args.loss)))

    start_epoch = 0
    best_loss = 1e5
    best_epoch = 0
    best_focal = float('inf')

    # define scheduler
    try:
        scheduler = eval(args.scheduler)(
            scheduler_lr_multiplier=args.scheduler_lr_multiplier,
            scheduler_epoch_offset=args.scheduler_epoch_offset)
    except:
        raise (RuntimeError("Scheduler {} not available!".format(
            args.scheduler)))
    optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0]

    # optionally resume from a checkpoint
    if args.resume:
        args.resume = os.path.join(model_out_dir, args.resume)
        if os.path.isfile(args.resume):
            # load checkpoint weights and update model and optimizer
            log.write(">> Loading checkpoint:\n>> '{}'\n".format(args.resume))

            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            best_epoch = checkpoint['best_epoch']
            best_focal = checkpoint['best_map']
            model.load_state_dict(checkpoint['state_dict'])

            optimizer_fpath = args.resume.replace('.pth', '_optim.pth')
            if os.path.exists(optimizer_fpath):
                log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                    optimizer_fpath))
                optimizer.load_state_dict(
                    torch.load(optimizer_fpath)['optimizer'])
            log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format(
                args.resume, checkpoint['epoch']))
        else:
            log.write(">> No checkpoint found at '{}'\n".format(args.resume))

    # Data loading code
    train_transform = train_multi_augment2

    with open('../input/imagelevel_folds_obvious_staining_5.pkl', 'rb') as f:
        folds = pickle.load(f)
    fold = args.fold
    trn_img_paths, val_img_paths = folds[fold]

    train_df = get_train_df_ohe(clean_from_duplicates=True)
    basepath_2_ohe_vector = {
        img: vec
        for img, vec in zip(train_df['img_base_path'],
                            train_df.iloc[:, 2:].values)
    }

    public_hpa_df_17 = get_public_df_ohe(clean_from_duplicates=True)
    public_basepath_2_ohe_vector = {
        img_path: vec
        for img_path, vec in zip(public_hpa_df_17['img_base_path'],
                                 public_hpa_df_17.iloc[:, 2:].values)
    }
    basepath_2_ohe_vector.update(public_basepath_2_ohe_vector)

    available_paths = set(
        np.concatenate((train_df['img_base_path'].values,
                        public_hpa_df_17['img_base_path'].values)))
    trn_img_paths = [path for path in trn_img_paths if path in available_paths]
    val_img_paths = [path for path in val_img_paths if path in available_paths]
    labels_df = pd.read_hdf(args.cell_level_labels_path)

    # modifying minor class labels
    cherrypicked_mitotic_spindle = pd.read_csv(
        '../input/mitotic_cells_selection.csv')

    cherrypicked_mitotic_spindle_img_cell = set(
        cherrypicked_mitotic_spindle[['ID', 'cell_i']].apply(tuple,
                                                             axis=1).values)

    cherrypicked_mitotic_spindle_img_cell = {
        (img, cell_i - 1)
        for img, cell_i in cherrypicked_mitotic_spindle_img_cell
    }

    class_names = get_class_names()
    mitotic_spindle_class_i = class_names.index('Mitotic spindle')

    if args.include_nn_mitotic:
        cherrypicked_mitotic_spindle_based_on_nn = pd.read_csv(
            '../input/mitotic_pos_nn_added.csv')
        cherrypicked_mitotic_spindle_img_cell.update(
            set(cherrypicked_mitotic_spindle_based_on_nn[[
                'ID', 'cell_i'
            ]].apply(tuple, axis=1).values))
        print('len cherrypicked_mitotic_spindle_img_cell',
              len(cherrypicked_mitotic_spindle_img_cell))
    mitotic_bool_idx = labels_df.index.isin(
        cherrypicked_mitotic_spindle_img_cell)

    def modify_label(labels, idx, val):
        labels[idx] = val
        return labels

    labels_df.loc[mitotic_bool_idx, 'image_level_pred'] = labels_df.loc[
        mitotic_bool_idx, 'image_level_pred'].map(
            lambda x: modify_label(x, mitotic_spindle_class_i, 1))

    if args.include_nn_mitotic:
        cherrypicked_not_mitotic_spindle_based_on_nn = pd.read_csv(
            '../input/mitotic_neg_nn_added.csv')
        cherrypicked_not_mitotic_spindle_based_on_nn = set(
            cherrypicked_not_mitotic_spindle_based_on_nn[[
                'ID', 'cell_i'
            ]].apply(tuple, axis=1).values)
        not_mitotic_bool_idx = labels_df.index.isin(
            cherrypicked_not_mitotic_spindle_based_on_nn)
        labels_df.loc[not_mitotic_bool_idx,
                      'image_level_pred'] = labels_df.loc[
                          not_mitotic_bool_idx,
                          'image_level_pred'].map(lambda x: modify_label(
                              x, mitotic_spindle_class_i, 0))

    if args.ignore_negative:
        raise NotImplementedError

    if args.upsample_minorities:
        cells_to_upsample = list(cherrypicked_mitotic_spindle_img_cell)
        aggresome_class_i = class_names.index('Aggresome')
        confident_aggresome_indices = list(
            labels_df.index[labels_df['image_level_pred'].map(
                lambda x: x[aggresome_class_i] > 0.9)])
        print('confident_aggresome_indices len',
              len(confident_aggresome_indices))
        print('confident_aggresome_indices[:5]',
              confident_aggresome_indices[:5])
        cells_to_upsample += confident_aggresome_indices
    else:
        cells_to_upsample = None
    train_dataset = ProteinDatasetCellSeparateLoading(
        trn_img_paths,
        labels_df=labels_df,
        cells_to_upsample=cells_to_upsample,
        img_size=args.img_size,
        in_channels=args.in_channels,
        transform=train_transform,
        basepath_2_ohe=basepath_2_ohe_vector,
        normalize=args.normalize,
        target_raw_img_size=args.target_raw_img_size)
    train_loader = DataLoader(
        train_dataset,
        sampler=RandomSampler(train_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    # valid_dataset = ProteinDatasetCellLevel(val_img_paths,
    #                                         labels_df=labels_df,
    #                                         img_size=args.img_size,
    #                                         batch_size=64,
    #                                         is_trainset=True,
    #                                         in_channels=args.in_channels)

    valid_dataset = ProteinDatasetCellSeparateLoading(
        val_img_paths,
        labels_df=labels_df,
        img_size=args.img_size,
        in_channels=args.in_channels,
        basepath_2_ohe=basepath_2_ohe_vector,
        normalize=args.normalize,
        target_raw_img_size=args.target_raw_img_size)
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=args.batch_size,
                              drop_last=False,
                              num_workers=args.workers,
                              pin_memory=True)

    log.write('** start training here! **\n')
    log.write('\n')
    log.write(
        'epoch    iter      rate     |  train_loss/acc  |    valid_loss/acc/map/focal     |best_epoch/best_focal|  min \n'
    )
    log.write(
        '-----------------------------------------------------------------------------------------------------------------\n'
    )
    start_epoch += 1

    if args.eval_at_start:
        with torch.no_grad():
            valid_loss, valid_acc, val_focal, val_map_score = validate(
                valid_loader, model, criterion, -1, log)
        print('\r', end='', flush=True)
        log.write(
            '%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |    %0.4f  %6.4f %6.4f %6.1f  |    %6.4f  %6.4f   | %3.1f min \n' % \
            (-1, -1, -1, -1, -1, valid_loss, valid_acc, val_map_score, val_focal,
                   best_epoch, best_focal, -1))

    for epoch in range(start_epoch, args.epochs + 1):
        end = time.time()

        # set manual seeds per epoch
        np.random.seed(epoch)
        torch.manual_seed(epoch)
        torch.cuda.manual_seed_all(epoch)

        # adjust learning rate for each epoch
        lr_list = scheduler.step(model, epoch, args.epochs)
        lr = lr_list[0]

        # train for one epoch on train set
        iter, train_loss, train_acc = train(
            train_loader,
            model,
            criterion,
            optimizer,
            epoch,
            clipnorm=args.clipnorm,
            lr=lr,
            agg_steps=args.gradient_accumulation_steps)

        with torch.no_grad():
            valid_loss, valid_acc, val_focal, val_map_score = validate(
                valid_loader, model, criterion, epoch, log)

        # remember best loss and save checkpoint
        is_best = val_focal < best_focal
        best_loss = min(valid_loss, best_loss)
        best_epoch = epoch if is_best else best_epoch
        best_focal = val_focal if is_best else best_focal

        print('\r', end='', flush=True)
        log.write('%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |    %0.4f  %6.4f %6.4f  %6.1f |  %6.4f  %6.4f | %3.1f min \n' % \
                  (epoch, iter + 1, lr, train_loss, train_acc, valid_loss, valid_acc, val_map_score, val_focal,
                   best_epoch, best_focal, (time.time() - end) / 60))

        save_model(model,
                   is_best,
                   model_out_dir,
                   optimizer=optimizer,
                   epoch=epoch,
                   best_epoch=best_epoch,
                   best_map=best_focal)
示例#5
0
文件: LWNet_nt.py 项目: sycomix/AINet
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(data_loader(test_data_txt),
                                          batch_size=args.test_batch_size)

make_if_not_exist(trained_model_dir)

if args.dataset == 'PaviaU':
    num_cla = 9
elif args.dataset == 'Indian':
    num_cla = 16
else:
    num_cla = 13
model = DataParallel(dict[args.model_name](num_classes=num_cla,
                                           dropout_keep_prob=0))
if args.use_cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(),
                      lr=args.lr,
                      momentum=args.momentum,
                      weight_decay=1e-5)

start_epoch = 0
if args.restore and len(os.listdir(trained_model_dir)):
    model, start_epoch = model_restore(model, trained_model_dir)

train_info_record = trained_model_dir + 'train_info_' + args.model_name + '.txt'

for epoch in range(start_epoch + 1, args.epochs + 1):
    start = time.time()
    train(epoch, model, train_loader, optimizer, args)
示例#6
0
torch.set_num_threads(8)
cudnn.benchmark = True
# cudnn.deterministic = False
cudnn.enabled = True

coco_val = Coco()

val_loader = DataLoader(coco_val,
                        batch_size=test_batch_size,
                        shuffle=False,
                        num_workers=8,
                        pin_memory=False)

pose_net = bninception(out_chn=2)
model = DataParallel(pose_net)
model.cuda()
checkpoint = torch.load('models/m_129.pth')
pretrained_dict = checkpoint['state_dict']
model.load_state_dict(pretrained_dict)
model.eval()
# total_loss = 0
start_time = time.time()
det_loss = 0
scale_loss = 0
rec = []
t = 0
for _, (img, imgf, meta) in enumerate(tqdm(val_loader)):
    with torch.no_grad():

        inputs = img.cuda(non_blocking=True)
        output = model(inputs)
示例#7
0
class AdvTrainer(BaseTrainer):
    def __init__(self, args):
        super(AdvTrainer, self).__init__(args)

    def make_model_env(self, gpu, ngpus_per_node):
        if self.args.distributed:
            self.args.gpu = self.args.devices[gpu]
        else:
            self.args.gpu = 0

        if self.args.use_cuda and self.args.distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            self.args.rank = self.args.rank * ngpus_per_node + gpu
            dist.init_process_group(backend=self.args.dist_backend,
                                    init_method=self.args.dist_url,
                                    world_size=self.args.world_size,
                                    rank=self.args.rank)

        self.model = DomainQA(self.args.bert_model, self.args.num_classes,
                              self.args.hidden_size, self.args.num_layers,
                              self.args.dropout, self.args.dis_lambda,
                              self.args.concat, self.args.anneal)

        if self.args.load_model is not None:
            print("Loading model from ", self.args.load_model)
            self.model.load_state_dict(
                torch.load(self.args.load_model,
                           map_location=lambda storage, loc: storage))

        if self.args.freeze_bert:
            for param in self.model.bert.parameters():
                param.requires_grad = False

        max_len = max([len(f) for f in self.features_lst])
        num_train_optimization_steps = math.ceil(
            max_len / self.args.batch_size) * self.args.epochs * len(
                self.features_lst)

        qa_params = list(self.model.bert.named_parameters()) + list(
            self.model.qa_outputs.named_parameters())
        dis_params = list(self.model.discriminator.named_parameters())
        self.qa_optimizer = get_opt(qa_params, num_train_optimization_steps,
                                    self.args)
        self.dis_optimizer = get_opt(dis_params, num_train_optimization_steps,
                                     self.args)

        if self.args.use_cuda:
            if self.args.distributed:
                torch.cuda.set_device(self.args.gpu)
                self.model.cuda(self.args.gpu)
                self.args.batch_size = int(self.args.batch_size /
                                           ngpus_per_node)
                self.args.workers = int(
                    (self.args.workers + ngpus_per_node - 1) / ngpus_per_node)
                self.model = DistributedDataParallel(
                    self.model,
                    device_ids=[self.args.gpu],
                    find_unused_parameters=True)
            else:
                self.model.cuda()
                self.model = DataParallel(self.model,
                                          device_ids=self.args.devices)

        cudnn.benchmark = True

    def train(self):
        step = 1
        avg_qa_loss = 0
        avg_dis_loss = 0
        iter_lst = [self.get_iter(self.features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])
        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            start = time.time()
            self.model.train()
            batch_step = 1
            for data_loader, sampler in iter_lst:
                if self.args.distributed:
                    sampler.set_epoch(epoch)

                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, labels = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    qa_loss = self.model(input_ids,
                                         seg_ids,
                                         input_mask,
                                         start_positions,
                                         end_positions,
                                         labels,
                                         dtype="qa",
                                         global_step=step)
                    qa_loss = qa_loss.mean()
                    qa_loss.backward()

                    # update qa model
                    avg_qa_loss = self.cal_running_avg_loss(
                        qa_loss.item(), avg_qa_loss)
                    self.qa_optimizer.step()
                    self.qa_optimizer.zero_grad()

                    # update discriminator
                    dis_loss = self.model(input_ids,
                                          seg_ids,
                                          input_mask,
                                          start_positions,
                                          end_positions,
                                          labels,
                                          dtype="dis",
                                          global_step=step)
                    dis_loss = dis_loss.mean()
                    dis_loss.backward()
                    avg_dis_loss = self.cal_running_avg_loss(
                        dis_loss.item(), avg_dis_loss)
                    self.dis_optimizer.step()
                    self.dis_optimizer.zero_grad()
                    step += 1
                    if epoch != 0 and i % 2000 == 0:
                        result_dict = self.evaluate_model(i)
                        for dev_file, f1 in result_dict.items():
                            print("GPU/CPU {} evaluated {}: {:.2f}".format(
                                self.args.gpu, dev_file, f1),
                                  end="\n")

                    batch_step += 1
                    msg = "{}/{} {} - ETA : {} - QA loss: {:.4f}, DIS loss: {:.4f}" \
                        .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                eta(start, batch_step, num_batches),
                                avg_qa_loss, avg_dis_loss)
                    print(msg, end="\r")

            print(
                "[GPU Num: {}, Epoch: {}, Final QA loss: {:.4f}, Final DIS loss: {:.4f}]"
                .format(self.args.gpu, epoch, avg_qa_loss, avg_dis_loss))

            # save model
            if not self.args.distributed or self.args.rank == 0:
                self.save_model(epoch, avg_qa_loss)

            if self.args.do_valid:
                result_dict = self.evaluate_model(epoch)
                for dev_file, f1 in result_dict.items():
                    print("GPU/CPU {} evaluated {}: {:.2f}".format(
                        self.args.gpu, dev_file, f1),
                          end="\n")
示例#8
0
def train_model(train_dataset, train_num_each, val_dataset, val_num_each):
    num_train = len(train_dataset)
    num_val = len(val_dataset)

    train_useful_start_idx = get_useful_start_idx(sequence_length, train_num_each)

    val_useful_start_idx = get_useful_start_idx(sequence_length, val_num_each)

    num_train_we_use = len(train_useful_start_idx) // num_gpu * num_gpu
    num_val_we_use = len(val_useful_start_idx) // num_gpu * num_gpu
    # num_train_we_use = 800
    # num_val_we_use = 80

    train_we_use_start_idx = train_useful_start_idx[0:num_train_we_use]
    val_we_use_start_idx = val_useful_start_idx[0:num_val_we_use]

    train_idx = []
    for i in range(num_train_we_use):
        for j in range(sequence_length):
            train_idx.append(train_we_use_start_idx[i] + j)

    val_idx = []
    for i in range(num_val_we_use):
        for j in range(sequence_length):
            val_idx.append(val_we_use_start_idx[i] + j)

    num_train_all = len(train_idx)
    num_val_all = len(val_idx)

    print('num train start idx : {:6d}'.format(len(train_useful_start_idx)))
    print('last idx train start: {:6d}'.format(train_useful_start_idx[-1]))
    print('num of train dataset: {:6d}'.format(num_train))
    print('num of train we use : {:6d}'.format(num_train_we_use))
    print('num of all train use: {:6d}'.format(num_train_all))
    print('num valid start idx : {:6d}'.format(len(val_useful_start_idx)))
    print('last idx valid start: {:6d}'.format(val_useful_start_idx[-1]))
    print('num of valid dataset: {:6d}'.format(num_val))
    print('num of valid we use : {:6d}'.format(num_val_we_use))
    print('num of all valid use: {:6d}'.format(num_val_all))

    train_loader = DataLoader(
        train_dataset,
        batch_size=train_batch_size,
        sampler=train_idx,
        num_workers=workers,
        pin_memory=False
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=val_batch_size,
        sampler=val_idx,
        num_workers=workers,
        pin_memory=False
    )

    model_old = multi_lstm()
    model_old = DataParallel(model_old)
    model_old.load_state_dict(torch.load(
        "cnn_lstm_1_epoch_25_length_10_opt_1_mulopt_1_flip_0_crop_1_batch_300_train1_9991_train2_9958_val1_9725_val2_8864.pth"))

    model = multi_lstm_p2t()
    model.share = model_old.module.share
    model.lstm = model_old.module.lstm
    model.fc = model_old.module.fc
    model.fc2 = model_old.module.fc2
    model.fc3 = model_old.module.fc3

    model = DataParallel(model)
    for param in model.module.fc_p2t.parameters():
        param.requires_grad = False
    model.module.fc_p2t.load_state_dict(torch.load(
        "fc_epoch_25_length_4_opt_1_mulopt_1_flip_0_crop_1_batch_800_train1_9951_train2_9713_val1_9686_val2_7867_p2t.pth"))

    if use_gpu:
        model = model.cuda()
        model.module.fc_p2t = model.module.fc_p2t.cuda()

    criterion_1 = nn.BCEWithLogitsLoss(size_average=False)
    criterion_2 = nn.CrossEntropyLoss(size_average=False)
    criterion_3 = nn.KLDivLoss(size_average=False)
    sigmoid_cuda = nn.Sigmoid().cuda()


    if multi_optim == 0:
        if optimizer_choice == 0:
            optimizer = optim.SGD([
                {'params': model.module.share.parameters()},
                {'params': model.module.lstm.parameters(), },
                {'params': model.module.fc.parameters()},
                {'params': model.module.fc2.parameters()},
                {'params': model.module.fc3.parameters()}],
                lr=learning_rate, momentum=momentum, dampening=dampening,
                weight_decay=weight_decay, nesterov=use_nesterov)
            if sgd_adjust_lr == 0:
                exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=sgd_step, gamma=sgd_gamma)
            elif sgd_adjust_lr == 1:
                exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
        elif optimizer_choice == 1:
            optimizer = optim.Adam([
                {'params': model.module.share.parameters()},
                {'params': model.module.lstm.parameters(), },
                {'params': model.module.fc.parameters()},
                {'params': model.module.fc2.parameters()},
                {'params': model.module.fc3.parameters()}], lr=learning_rate)
    elif multi_optim == 1:
        if optimizer_choice == 0:
            optimizer = optim.SGD([
                {'params': model.module.share.parameters()},
                {'params': model.module.lstm.parameters(), 'lr': learning_rate},
                {'params': model.module.fc.parameters(), 'lr': learning_rate},
                {'params': model.module.fc2.parameters(), 'lr': learning_rate},
                {'params': model.module.fc3.parameters(), 'lr': learning_rate},
            ], lr=learning_rate / 10, momentum=momentum, dampening=dampening,
                weight_decay=weight_decay, nesterov=use_nesterov)
            if sgd_adjust_lr == 0:
                exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=sgd_step, gamma=sgd_gamma)
            elif sgd_adjust_lr == 1:
                exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
        elif optimizer_choice == 1:
            optimizer = optim.Adam([
                {'params': model.module.share.parameters()},
                {'params': model.module.lstm.parameters(), 'lr': learning_rate},
                {'params': model.module.fc.parameters(), 'lr': learning_rate},
                {'params': model.module.fc2.parameters(), 'lr': learning_rate},
                {'params': model.module.fc3.parameters(), 'lr': learning_rate},
            ], lr=learning_rate / 10)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_accuracy_1 = 0.0
    best_val_accuracy_2 = 0.0
    correspond_train_acc_1 = 0.0
    correspond_train_acc_2 = 0.0

    # 要存储2个train的准确率 2个valid的准确率 3个train 3个loss的loss, 一共10个数据要记录
    record_np = np.zeros([epochs, 12])

    for epoch in range(epochs):
        # np.random.seed(epoch)
        np.random.shuffle(train_we_use_start_idx)
        train_idx = []
        for i in range(num_train_we_use):
            for j in range(sequence_length):
                train_idx.append(train_we_use_start_idx[i] + j)

        train_loader = DataLoader(
            train_dataset,
            batch_size=train_batch_size,
            sampler=train_idx,
            num_workers=workers,
            pin_memory=False
        )

        model.train()
        train_loss_1 = 0.0
        train_loss_2 = 0.0
        train_loss_3 = 0.0
        train_corrects_1 = 0
        train_corrects_2 = 0
        train_corrects_3 = 0

        train_start_time = time.time()
        for data in train_loader:
            inputs, labels_1, labels_2 = data
            if use_gpu:
                inputs = Variable(inputs.cuda())
                labels_1 = Variable(labels_1.cuda())
                labels_2 = Variable(labels_2.cuda())
            else:
                inputs = Variable(inputs)
                labels_1 = Variable(labels_1)
                labels_2 = Variable(labels_2)

            optimizer.zero_grad()

            outputs_1, outputs_2, outputs_3 = model.forward(inputs)

            _, preds_2 = torch.max(outputs_2.data, 1)
            train_corrects_2 += torch.sum(preds_2 == labels_2.data)

            sig_output_1 = sigmoid_cuda(outputs_1)
            sig_output_3 = sigmoid_cuda(outputs_3)

            sig_average = (sig_output_1.data + sig_output_3.data) / 2

            preds_1 = torch.cuda.ByteTensor(sig_output_1.data > 0.5)
            preds_1 = preds_1.long()
            train_corrects_1 += torch.sum(preds_1 == labels_1.data)

            preds_3 = torch.cuda.ByteTensor(sig_average > 0.5)
            preds_3 = preds_3.long()
            train_corrects_3 += torch.sum(preds_3 == labels_1.data)

            labels_1 = Variable(labels_1.data.float())
            loss_1 = criterion_1(outputs_1, labels_1)
            loss_2 = criterion_2(outputs_2, labels_2)

            sig_output_3 = Variable(sig_output_3.data, requires_grad=False)
            loss_3 = torch.abs(criterion_3(sig_output_1, sig_output_3))
            loss = loss_1 + loss_2 + loss_3 * alpha
            loss.backward()
            optimizer.step()

            train_loss_1 += loss_1.data[0]
            train_loss_2 += loss_2.data[0]
            train_loss_3 += loss_3.data[0]

        train_elapsed_time = time.time() - train_start_time
        train_accuracy_1 = train_corrects_1 / num_train_all / 7
        train_accuracy_2 = train_corrects_2 / num_train_all
        train_accuracy_3 = train_corrects_3 / num_train_all / 7
        train_average_loss_1 = train_loss_1 / num_train_all / 7
        train_average_loss_2 = train_loss_2 / num_train_all
        train_average_loss_3 = train_loss_3 / num_train_all

        # begin eval

        model.eval()
        val_loss_1 = 0.0
        val_loss_2 = 0.0
        val_loss_3 = 0.0
        val_corrects_1 = 0
        val_corrects_2 = 0
        val_corrects_3 = 0

        val_start_time = time.time()
        for data in val_loader:
            inputs, labels_1, labels_2 = data
            labels_2 = labels_2[(sequence_length - 1):: sequence_length]
            if use_gpu:
                inputs = Variable(inputs.cuda(), volatile=True)
                labels_1 = Variable(labels_1.cuda(), volatile=True)
                labels_2 = Variable(labels_2.cuda(), volatile=True)
            else:
                inputs = Variable(inputs, volatile=True)
                labels_1 = Variable(labels_1, volatile=True)
                labels_2 = Variable(labels_2, volatile=True)

            outputs_1, outputs_2, outputs_3 = model.forward(inputs)
            outputs_2 = outputs_2[(sequence_length - 1):: sequence_length]
            _, preds_2 = torch.max(outputs_2.data, 1)
            val_corrects_2 += torch.sum(preds_2 == labels_2.data)

            sig_output_1 = sigmoid_cuda(outputs_1)
            sig_output_3 = sigmoid_cuda(outputs_3)

            sig_average = (sig_output_1.data + sig_output_3.data) / 2

            preds_1 = torch.cuda.ByteTensor(sig_output_1.data > 0.5)
            preds_1 = preds_1.long()
            val_corrects_1 += torch.sum(preds_1 == labels_1.data)

            preds_3 = torch.cuda.ByteTensor(sig_average > 0.5)
            preds_3 = preds_3.long()
            val_corrects_3 += torch.sum(preds_3 == labels_1.data)

            labels_1 = Variable(labels_1.data.float())
            loss_1 = criterion_1(outputs_1, labels_1)
            loss_2 = criterion_2(outputs_2, labels_2)

            sig_output_3 = Variable(sig_output_3.data, requires_grad=False)
            loss_3 = torch.abs(criterion_3(sig_output_1, sig_output_3))

            val_loss_1 += loss_1.data[0]
            val_loss_2 += loss_2.data[0]
            val_loss_3 += loss_3.data[0]

        val_elapsed_time = time.time() - val_start_time
        val_accuracy_1 = val_corrects_1 / (num_val_all * 7)
        val_accuracy_2 = val_corrects_2 / num_val_we_use
        val_accuracy_3 = val_corrects_3 / (num_val_all * 7)
        val_average_loss_1 = val_loss_1 / (num_val_all * 7)
        val_average_loss_2 = val_loss_2 / num_val_we_use
        val_average_loss_3 = val_loss_3 / num_val_all

        print('epoch: {:3d}'
              ' train time: {:2.0f}m{:2.0f}s'
              ' train accu_1: {:.4f}'
              ' train accu_3: {:.4f}'
              ' train accu_2: {:.4f}'
              ' train loss_1: {:4.4f}'
              ' train loss_2: {:4.4f}'
              ' train loss_3: {:4.4f}'
              .format(epoch,
                      train_elapsed_time // 60,
                      train_elapsed_time % 60,
                      train_accuracy_1,
                      train_accuracy_3,
                      train_accuracy_2,
                      train_average_loss_1,
                      train_average_loss_2,
                      train_average_loss_3))
        print('epoch: {:3d}'
              ' valid time: {:2.0f}m{:2.0f}s'
              ' valid accu_1: {:.4f}'
              ' valid accu_3: {:.4f}'
              ' valid accu_2: {:.4f}'
              ' valid loss_1: {:4.4f}'
              ' valid loss_2: {:4.4f}'
              ' valid loss_3: {:4.4f}'
              .format(epoch,
                      val_elapsed_time // 60,
                      val_elapsed_time % 60,
                      val_accuracy_1,
                      val_accuracy_3,
                      val_accuracy_2,
                      val_average_loss_1,
                      val_average_loss_2,
                      val_average_loss_3))

        if optimizer_choice == 0:
            if sgd_adjust_lr == 0:
                exp_lr_scheduler.step()
            elif sgd_adjust_lr == 1:
                exp_lr_scheduler.step(val_average_loss_1 + val_average_loss_2 + alpha * val_average_loss_3)

        if val_accuracy_2 > best_val_accuracy_2 and val_accuracy_1 > 0.95:
            best_val_accuracy_2 = val_accuracy_2
            best_val_accuracy_1 = val_accuracy_1
            correspond_train_acc_1 = train_accuracy_1
            correspond_train_acc_2 = train_accuracy_2
            best_model_wts = copy.deepcopy(model.state_dict())
        elif val_accuracy_2 == best_val_accuracy_2 and val_accuracy_1 > 0.95:
            if val_accuracy_1 > best_val_accuracy_1:
                correspond_train_acc_1 = train_accuracy_1
                correspond_train_acc_2 = train_accuracy_2
                best_model_wts = copy.deepcopy(model.state_dict())
            elif val_accuracy_1 == best_val_accuracy_1:
                if train_accuracy_2 > correspond_train_acc_2:
                    correspond_train_acc_2 = train_accuracy_2
                    correspond_train_acc_1 = train_accuracy_1
                    best_model_wts = copy.deepcopy(model.state_dict())
                elif train_accuracy_2 == correspond_train_acc_2:
                    if train_accuracy_1 > best_val_accuracy_1:
                        correspond_train_acc_1 = train_accuracy_1
                        best_model_wts = copy.deepcopy(model.state_dict())

        record_np[epoch, 0] = train_accuracy_1
        record_np[epoch, 1] = train_accuracy_3
        record_np[epoch, 2] = train_accuracy_2
        record_np[epoch, 3] = train_average_loss_1
        record_np[epoch, 4] = train_average_loss_2
        record_np[epoch, 5] = train_average_loss_3

        record_np[epoch, 6] = val_accuracy_1
        record_np[epoch, 7] = val_accuracy_3
        record_np[epoch, 7] = val_accuracy_2
        record_np[epoch, 9] = val_average_loss_1
        record_np[epoch, 10] = val_average_loss_2
        record_np[epoch, 11] = val_average_loss_3

    print('best accuracy_1: {:.4f} cor train accu_1: {:.4f}'.format(best_val_accuracy_1, correspond_train_acc_1))
    print('best accuracy_2: {:.4f} cor train accu_2: {:.4f}'.format(best_val_accuracy_2, correspond_train_acc_2))

    save_val_1 = int("{:4.0f}".format(best_val_accuracy_1 * 10000))
    save_val_2 = int("{:4.0f}".format(best_val_accuracy_2 * 10000))
    save_train_1 = int("{:4.0f}".format(correspond_train_acc_1 * 10000))
    save_train_2 = int("{:4.0f}".format(correspond_train_acc_2 * 10000))
    public_name = "cnn_lstm1_p2t" \
                  + "_epoch_" + str(epochs) \
                  + "_length_" + str(sequence_length) \
                  + "_opt_" + str(optimizer_choice) \
                  + "_mulopt_" + str(multi_optim) \
                  + "_flip_" + str(use_flip) \
                  + "_crop_" + str(crop_type) \
                  + "_batch_" + str(train_batch_size) \
                  + "_train1_" + str(save_train_1) \
                  + "_train2_" + str(save_train_2) \
                  + "_val1_" + str(save_val_1) \
                  + "_val2_" + str(save_val_2)
    model_name = public_name + ".pth"
    torch.save(best_model_wts, model_name)

    record_name = public_name + ".npy"
    np.save(record_name, record_np)
示例#9
0
def main():
    args = parse_args()

    # __________________ Params ___________________
    sample_size = args.sample_size
    train_batch_size = args.train_batch_size
    test_batch_size = 128
    num_epochs = args.num_epochs
    num_workers = args.num_worker
    lr = args.learning_rate
    start_epoch = 0
    # _____________________________________________

    manual_seed = random.randint(1, 10000)
    random.seed(manual_seed)
    torch.manual_seed(manual_seed)
    torch.cuda.manual_seed_all(manual_seed)
    torch.set_num_threads(num_workers + 1)
    cudnn.benchmark = True
    # cudnn.deterministic = False
    cudnn.enabled = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    coco_train = Coco(
        'datasets/data/coco_data/person_keypoints_train2017.json', 'train',
        sample_size, transforms.Compose([normalize]))
    coco_val = Coco('datasets/data/coco_data/person_keypoints_val2017.json',
                    'train', sample_size, transforms.Compose([normalize]))

    train_loader = DataLoader(coco_train,
                              batch_size=train_batch_size,
                              shuffle=True,
                              num_workers=num_workers,
                              pin_memory=False)
    val_loader = DataLoader(coco_val,
                            batch_size=test_batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=False)

    pose_net = bninception(out_chn=2)
    model = DataParallel(pose_net)
    model.cuda()

    #checkpoint = torch.load('models/m_100.pth')
    #pretrained_dict = checkpoint['state_dict']
    #model.load_state_dict(pretrained_dict)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    #os.makedirs(args.log, exist_ok=True)

    for epoch in range(start_epoch, num_epochs):
        if epoch == 100:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 1e-4

        dloss, scale_loss = train(train_loader, model, optimizer)
        tloss = 'det_loss ' + str(dloss) + ' scale loss ' + str(scale_loss)

        dloss, scale_loss = train_test(val_loader, model)
        test_loss = 'det_loss ' + str(dloss) + ' scale loss ' + str(scale_loss)

        with open('losses/train_loss_384.txt', 'a') as the_file:
            the_file.write(str(tloss) + '\n')

        with open('losses/test_loss_384.txt', 'a') as the_file:
            the_file.write(str(test_loss) + '\n')

        ckpt = {
            'epoch': epoch,
            'optimizer': optimizer.state_dict(),
            'state_dict': model.state_dict()
        }

        #os.makedirs(args.model_save_path, exist_ok=True)
        #ckpt_name = os.path.join(args.model_save_path, 'epoch_%d.ckpt' % epoch)
        torch.save(ckpt, 'models/m_384_' + str(epoch) + '.pth')
示例#10
0
文件: train_first.py 项目: ganjf/NCRF
def run(args):
    with open(args.cfg_path) as f:
        cfg = json.load(f)

    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)

    batch_size_train = cfg['batch_size']
    batch_size_valid = cfg['batch_size'] * 3
    num_workers = args.num_workers
    grid_size = cfg['grid_size']

    logger = logging.getLogger("valid")
    logger.setLevel(logging.DEBUG)
    fileHanlder = logging.FileHandler('valid.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fileHanlder.setFormatter(formatter)
    logger.addHandler(fileHanlder)

    model = MODELS[cfg['model']](num_nodes=grid_size, use_crf=cfg['use_crf'])
    model = DataParallel(model, device_ids=None)
    checkpoint = torch.load(args.load_path)
    model.load_state_dict(checkpoint['state_dict'])
    model = model.cuda()

    loss_fn = BCEWithLogitsLoss().cuda()
    optimizer = SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'])

    dataset_train = GridPatchTrainDataset(csv_file=cfg['data_path_train'],
                                          root_dir=cfg['root_dir'])

    dataset_valid = GridPatchValidnDataset(csv_file=cfg['data_path_valid'],
                                           root_dir=cfg['root_dir'])

    dataloader_train = DataLoader(dataset_train,
                                  batch_size=batch_size_train,
                                  shuffle=True,
                                  num_workers=num_workers)
    dataloader_valid = DataLoader(dataset_valid,
                                  batch_size=batch_size_valid,
                                  num_workers=num_workers)

    summary_train = {'epoch': 0, 'step': 0}
    summary_valid = {'loss': float('inf'), 'acc': 0}
    summary_writer = SummaryWriter(comment='NCRF')
    loss_valid_best = 1000.0

    for epoch in range(cfg['epoch']):
        if epoch == 5:
            for param_group in optimizer.param_groups:
                param_group['lr'] = cfg['lr_1']
        if epoch == 10:
            for param_group in optimizer.param_groups:
                param_group['lr'] = cfg['lr_2']
        if epoch == 15:
            for param_group in optimizer.param_groups:
                param_group['lr'] = cfg['lr_3']

        summary_train = train_epoch(summary_train, summary_writer, cfg, model,
                                    loss_fn, optimizer, dataloader_train)
        fileName = 'train_' + str(summary_train['epoch']) + '.pkl'
        torch.save(
            {
                'epoch': summary_train['epoch'],
                'step': summary_train['step'],
                'state_dict': model.state_dict()
            }, os.path.join(args.save_path, fileName))

        time_now = time.time()
        summary_valid = valid_epoch(summary_valid, cfg, model, loss_fn,
                                    dataloader_valid)
        time_spent = time.time() - time_now

        logger.info('{}, Epoch : {}, Step : {}, Validation Loss : {:.5f}, '
                    'Validation Acc : {:.3f}, Run Time : {:.2f}'.format(
                        time.strftime("%Y-%m-%d %H:%M:%S"),
                        summary_train['epoch'], summary_train['step'],
                        summary_valid['loss'], summary_valid['acc'],
                        time_spent))

        summary_writer.add_scalar('valid/loss', summary_valid['loss'],
                                  summary_train['step'])
        summary_writer.add_scalar('valid/acc', summary_valid['acc'],
                                  summary_train['step'])

        if summary_valid['loss'] < loss_valid_best:
            loss_valid_best = summary_valid['loss']

            torch.save(
                {
                    'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'state_dict': model.state_dict()
                }, os.path.join(args.save_path, 'best.pkl'))

    summary_writer.close()
示例#11
0
class Train:
    def __init__(self,
                 model,
                 data,
                 epoch,
                 batch_size,
                 loss,
                 loss_params,
                 ops_params,
                 lr=5e-4,
                 optimizer='adam',
                 mode='parallel',
                 continue_train=False,
                 save=None):
        self.model = model
        self.data = data
        self.epoch = epoch
        self.batch_size = batch_size
        self.loss = loss
        self.loss_params = loss_params
        self.ops_params = ops_params
        self.lr = lr
        self.optimizer = optimizer
        self.mode = mode
        self.continue_train = continue_train
        self.save = save

    def _train(self):
        if self.mode == 'gpu':
            device = torch.device('cuda', 0)
            if self.continue_train == True:
                self.model.load_state_dict(torch.load(self.save))
            self.model = self.model.to(device)
        elif self.mode == 'parallel':
            num_gpu = torch.cuda.device_count()
            self.model = DataParallel(self.model,
                                      device_ids=[i for i in range(num_gpu)])
            if self.continue_train == True:
                self.model.load_state_dict(torch.load(self.save))
            self.model = self.model.cuda()
        self.model = self.model.train()
        params = self.model.parameters()
        optimizer = self._create_optimizer()
        optimizer = optimizer(params, lr=self.lr, **self.ops_params)

        start_time = int(time.time())
        log = open('./logs/loggings/LaneNet_{}.txt'.format(start_time), 'w')
        step = 0
        for e_p in range(self.epoch):
            for batch_data in self.data['train']:
                s = time.time()
                input_data = batch_data[0]
                seg_mask = batch_data[1]
                instance_mask = batch_data[2]

                input_data = input_data.cuda()
                seg_mask = seg_mask.cuda()
                instance_mask = instance_mask.cuda()

                predictions, embeddings = self.model(input_data)
                total_loss = self.loss(self.batch_size, predictions, seg_mask,
                                       embeddings, instance_mask,
                                       **self.loss_params)
                total_loss, segmentation_loss, discriminative_loss = total_loss(
                )
                log.write(
                    'Steps:{}, Total Loss:{}, Segmentation Loss:{}, Discriminative Loss:{}\n'
                    .format(step, total_loss, segmentation_loss,
                            discriminative_loss))
                log.flush()
                optimizer.zero_grad()
                total_loss.backward()
                clip_grad_value_(params, clip_value=5.)
                optimizer.step()
                step += 1
                e = time.time()
                print(
                    "step time:{}, seg_loss:{:.6f}, dis_loss:{:.6f}\n".format(
                        e - s, segmentation_loss, discriminative_loss))
            torch.save(
                self.model.state_dict(),
                os.path.join('./logs/models',
                             'model_1_{}_{}.pkl'.format(start_time, e_p)))
        log.close()

    def _create_optimizer(self):
        if self.optimizer == 'adam':
            return torch.optim.Adam
        elif self.optimizer == 'sgd':
            return torch.optim.SGD

    def __call__(self):
        self._train()
示例#12
0
def test_model(test_dataset, test_num_each):
    num_test = len(test_dataset)
    test_useful_start_idx = get_useful_start_idx(sequence_length,
                                                 test_num_each)

    num_test_we_use = len(test_useful_start_idx)
    # num_test_we_use = 804
    # num_test_we_use = len(test_useful_start_idx) // (test_batch_size // sequence_length) * (
    #     test_batch_size // sequence_length)

    test_we_use_start_idx = test_useful_start_idx[0:num_test_we_use]

    test_idx = []
    for i in range(num_test_we_use):
        for j in range(sequence_length):
            test_idx.append(test_we_use_start_idx[i] + j)

    num_test_all = len(test_idx)

    print('num test start idx : {:6d}'.format(len(test_useful_start_idx)))
    print('last idx test start: {:6d}'.format(test_useful_start_idx[-1]))
    print('num of test dataset: {:6d}'.format(num_test))
    print('num of test we use : {:6d}'.format(num_test_we_use))
    print('num of all test use: {:6d}'.format(num_test_all))

    test_loader = DataLoader(test_dataset,
                             batch_size=test_batch_size,
                             sampler=test_idx,
                             num_workers=workers,
                             pin_memory=False)

    model = resnet_lstm()
    model.load_state_dict(torch.load(model_name))
    model = model.module
    model = DataParallel(model)
    if use_gpu:
        model = model.cuda()
    # 应该可以直接多gpu计算
    # model = model.module            #要测试一下
    criterion = nn.CrossEntropyLoss(size_average=False)

    model.eval()
    test_loss = 0.0
    test_corrects = 0
    test_start_time = time.time()

    all_preds = []

    for data in test_loader:
        inputs, labels_1, labels_2 = data
        labels_2 = labels_2[(sequence_length - 1)::sequence_length]
        if use_gpu:
            inputs = Variable(inputs.cuda(), volatile=True)
            labels = Variable(labels_2.cuda(), volatile=True)
        else:
            inputs = Variable(inputs, volatile=True)
            labels = Variable(labels_2, volatile=True)

        outputs = model.forward(inputs)
        outputs = outputs.view(-1, sequence_length, 7)
        outputs = torch.mean(outputs, 1)

        _, preds = torch.max(outputs.data, 1)
        for i in range(len(preds)):
            all_preds.append(preds[i])
        print(len(all_preds))
        loss = criterion(outputs, labels)
        test_loss += loss.data[0]
        test_corrects += torch.sum(preds == labels.data)

    test_elapsed_time = time.time() - test_start_time
    test_accuracy = test_corrects / num_test_we_use
    test_average_loss = test_loss / num_test_we_use

    print('type of all_preds:', type(all_preds))
    print('leng of all preds:', len(all_preds))

    with open(pred_name, 'wb') as f:
        pickle.dump(all_preds, f)
    print('test elapsed: {:2.0f}m{:2.0f}s'
          ' test loss: {:4.4f}'
          ' test accu: {:.4f}'.format(test_elapsed_time // 60,
                                      test_elapsed_time % 60,
                                      test_average_loss, test_accuracy))
示例#13
0
    def train(self):
        torch.multiprocessing.set_sharing_strategy('file_system')

        path = self.args.data_path
        label_file = self.args.label_path
        self.logger.info('original train process')
        time_stamp_launch = time.strftime('%Y%m%d') + '-' + time.strftime(
            '%H%M')
        self.logger.info(path.split('/')[-2] + time_stamp_launch)
        best_acc = 0
        model_root = './model_' + path.split('/')[-2]
        if not os.path.exists(model_root):
            os.mkdir(model_root)
        cuda = True
        cudnn.benchmark = True
        batch_size = self.args.batchsize
        batch_size_g = batch_size * 2
        image_size = (224, 224)
        num_cls = self.args.num_class

        self.generator_epoch = self.args.generator_epoch
        self.warm_epoch = 10
        n_epoch = self.args.max_epoch
        weight_decay = 1e-6
        momentum = 0.9

        manual_seed = random.randint(1, 10000)
        random.seed(manual_seed)
        torch.manual_seed(manual_seed)

        #######################
        # load data           #
        #######################
        target_train = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop((224, 224)),
            transforms.RandomHorizontalFlip(),
            AutoAugment(),
            transforms.ToTensor(),
            transforms.Normalize((0.435, 0.418, 0.396),
                                 (0.284, 0.308, 0.335)),  # grayscale mean/std
        ])

        dataset_train = visDataset_target(path,
                                          label_file,
                                          train=True,
                                          transform=target_train)

        dataloader_train = torch.utils.data.DataLoader(dataset=dataset_train,
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       num_workers=3)
        transform_test = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.435, 0.418, 0.396),
                                 (0.284, 0.308, 0.335)),  # grayscale mean/std
        ])

        test_dataset = visDataset_target(path,
                                         label_file,
                                         train=True,
                                         transform=transform_test)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=3)

        #####################
        #  load model       #
        #####################
        self.lemniscate = LinearAverage(2048, test_dataset.__len__(), 0.05,
                                        0.00).cuda()
        self.elr_loss = elr_loss(num_examp=test_dataset.__len__(),
                                 num_classes=12).cuda()

        generator = generator_fea_deconv(class_num=num_cls)

        discriminator = Discriminator_fea()
        source_net = torch.load(self.args.source_model_path)
        source_classifier = Classifier(num_classes=num_cls)
        fea_contrastor = contrastor()

        # load pre-trained source classifier
        fc_dict = source_classifier.state_dict()
        pre_dict = source_net.state_dict()
        pre_dict = {k: v for k, v in pre_dict.items() if k in fc_dict}
        fc_dict.update(pre_dict)
        source_classifier.load_state_dict(fc_dict)

        generator = DataParallel(generator, device_ids=[0, 1])
        discriminator = DataParallel(discriminator, device_ids=[0, 1])
        fea_contrastor = DataParallel(fea_contrastor, device_ids=[0, 1])
        source_net = DataParallel(source_net, device_ids=[0, 1])
        source_classifier = DataParallel(source_classifier, device_ids=[0, 1])
        source_classifier.eval()

        for p in generator.parameters():
            p.requires_grad = True
        for p in source_net.parameters():
            p.requires_grad = True

        # freezing the source classifier
        for name, value in source_net.named_parameters():
            if name[:9] == 'module.fc':
                value.requires_grad = False

        # setup optimizer
        params = filter(lambda p: p.requires_grad, source_net.parameters())
        discriminator_group = []
        for k, v in discriminator.named_parameters():
            discriminator_group += [{'params': v, 'lr': self.lr * 3}]

        model_params = []
        for v in params:
            model_params += [{'params': v, 'lr': self.lr}]

        contrastor_para = []
        for k, v in fea_contrastor.named_parameters():
            contrastor_para += [{'params': v, 'lr': self.lr * 5}]

        #####################
        # setup optimizer   #
        #####################

        # only train the extractor
        optimizer = optim.SGD(model_params + discriminator_group +
                              contrastor_para,
                              momentum=momentum,
                              weight_decay=weight_decay)
        optimizer_g = optim.SGD(generator.parameters(),
                                lr=self.lr,
                                momentum=momentum,
                                weight_decay=weight_decay)

        loss_gen_ce = torch.nn.CrossEntropyLoss()

        if cuda:
            source_net = source_net.cuda()
            generator = generator.cuda()
            discriminator = discriminator.cuda()
            fea_contrastor = fea_contrastor.cuda()
            loss_gen_ce = loss_gen_ce.cuda()
            source_classifier = source_classifier.cuda()

        #############################
        # training network          #
        #############################

        len_dataloader = len(dataloader_train)
        self.logger.info('the step of one epoch: ' + str(len_dataloader))

        current_step = 0
        for epoch in range(n_epoch):
            source_net.train()
            discriminator.train()
            fea_contrastor.train()

            data_train_iter = iter(dataloader_train)

            if epoch < self.generator_epoch:
                generator.train()
                self.train_prototype_generator(epoch, batch_size_g, num_cls,
                                               optimizer_g, generator,
                                               source_classifier, loss_gen_ce)

            if epoch >= self.generator_epoch:
                if epoch == self.generator_epoch:
                    torch.save(
                        generator, model_root + '/generator_' +
                        path.split('/')[-2] + '.pkl')

                # prototype generation
                generator.eval()
                z = Variable(torch.rand(self.args.num_class * 2, 100)).cuda()

                # Get labels ranging from 0 to n_classes for n rows
                label_t = torch.linspace(0, num_cls - 1, steps=num_cls).long()
                for ti in range(self.args.num_class * 2 // num_cls - 1):
                    label_t = torch.cat([
                        label_t,
                        torch.linspace(0, num_cls - 1, steps=num_cls).long()
                    ])
                labels = Variable(label_t).cuda()
                z = z.contiguous()
                labels = labels.contiguous()
                images = generator(z, labels)

                self.alpha = 0.9 - (epoch - self.generator_epoch) / (
                    n_epoch - self.generator_epoch) * 0.2

                # obtain the target pseudo label and confidence weight
                pseudo_label, pseudo_label_acc, all_indx, confidence_weight = self.obtain_pseudo_label_and_confidence_weight(
                    test_loader, source_net)

                i = 0
                while i < len_dataloader:
                    ###################################
                    #        prototype adaptation         #
                    ###################################
                    p = float(i +
                              (epoch - self.generator_epoch) * len_dataloader
                              ) / (n_epoch -
                                   self.generator_epoch) / len_dataloader
                    self.p = 2. / (1. + np.exp(-10 * p)) - 1
                    data_target_train = data_train_iter.next()
                    s_img, s_label, s_indx = data_target_train

                    batch_size_s = len(s_label)

                    input_img_s = torch.FloatTensor(batch_size_s, 3,
                                                    image_size[0],
                                                    image_size[1])
                    class_label_s = torch.LongTensor(batch_size_s)

                    if cuda:
                        s_img = s_img.cuda()
                        s_label = s_label.cuda()
                        input_img_s = input_img_s.cuda()
                        class_label_s = class_label_s.cuda()

                    input_img_s.resize_as_(s_img).copy_(s_img)
                    class_label_s.resize_as_(s_label).copy_(s_label)
                    target_inputv_img = Variable(input_img_s)
                    target_classv_label = Variable(class_label_s)

                    # learning rate decay
                    optimizer = self.exp_lr_scheduler(optimizer=optimizer,
                                                      step=current_step)

                    loss, contrastive_loss = self.adaptation_step(
                        target_inputv_img, pseudo_label, images.detach(),
                        labels, s_indx.numpy(), source_net, discriminator,
                        fea_contrastor, optimizer, epoch,
                        confidence_weight.float())

                    # visualization on tensorboard
                    self.writer.add_scalar('contrastive_loss',
                                           contrastive_loss,
                                           global_step=current_step)
                    self.writer.add_scalar('overall_loss',
                                           loss,
                                           global_step=current_step)
                    self.writer.add_scalar('pseudo_label_acc',
                                           pseudo_label_acc,
                                           global_step=current_step)

                    i += 1
                    current_step += 1

                self.logger.info('epoch: %d' % epoch)
                self.logger.info('contrastive_loss: %f' % (contrastive_loss))
                self.logger.info('loss: %f' % loss)
                accu, ac_list = val_pclass(source_net, test_loader)
                self.writer.add_scalar('test_acc',
                                       accu,
                                       global_step=current_step)
                self.logger.info(ac_list)
                if accu >= best_acc:
                    self.logger.info('saving the best model!')
                    torch.save(
                        source_net, model_root + '/' + time_stamp_launch +
                        '_best_model_' + path.split('/')[-2] + '.pkl')
                    best_acc = accu

                self.logger.info('acc is : %.04f, best acc is : %.04f' %
                                 (accu, best_acc))
                self.logger.info(
                    '================================================')

        self.logger.info('training done! ! !')
示例#14
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}')
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.train.txt'), mode='a')

    model_out_dir = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}')
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        model_out_dir))
    if not ope(model_out_dir):
        os.makedirs(model_out_dir)

    # set cuda visible device
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True

    # set random seeds
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    np.random.seed(0)

    model_params = {}
    model_params['architecture'] = args.arch
    model = init_network(model_params)

    # move network to gpu
    model = DataParallel(model)
    model.cuda()

    if args.ema:
        ema_model = copy.deepcopy(model)
        ema_model.cuda()
    else:
        ema_model = None

    # define loss function (criterion)
    try:
        criterion = eval(args.loss)().cuda()
    except:
        raise (RuntimeError("Loss {} not available!".format(args.loss)))

    start_epoch = 0
    best_epoch = 0
    best_dice = 0
    best_dice_arr = np.zeros(3)

    # define scheduler
    try:
        scheduler = eval(args.scheduler)()
    except:
        raise (RuntimeError("Scheduler {} not available!".format(
            args.scheduler)))
    optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0]

    # optionally resume from a checkpoint
    if args.resume:
        model_fpath = os.path.join(model_out_dir, args.resume)
        if os.path.isfile(model_fpath):
            # load checkpoint weights and update model and optimizer
            log.write(">> Loading checkpoint:\n>> '{}'\n".format(model_fpath))

            checkpoint = torch.load(model_fpath)
            start_epoch = checkpoint['epoch']
            best_epoch = checkpoint['best_epoch']
            best_dice_arr = checkpoint['best_dice_arr']
            best_dice = np.max(best_dice_arr)
            model.module.load_state_dict(checkpoint['state_dict'])

            optimizer_fpath = model_fpath.replace('.pth', '_optim.pth')
            if ope(optimizer_fpath):
                log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                    optimizer_fpath))
                optimizer.load_state_dict(
                    torch.load(optimizer_fpath)['optimizer'])

            if args.ema:
                ema_model_fpath = model_fpath.replace('.pth', '_ema.pth')
                if ope(ema_model_fpath):
                    log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                        ema_model_fpath))
                    ema_model.module.load_state_dict(
                        torch.load(ema_model_fpath)['state_dict'])
            log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format(
                model_fpath, checkpoint['epoch']))
        else:
            log.write(">> No checkpoint found at '{}'\n".format(model_fpath))

    # Data loading code
    train_transform = eval(args.train_transform)
    steel_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
    steel_df['ImageId'], steel_df['ClassId'] = zip(
        *steel_df['ImageId_ClassId'].apply(lambda x: x.split('_')))
    steel_df = pd.pivot_table(steel_df,
                              index='ImageId',
                              columns='ClassId',
                              values='EncodedPixels',
                              aggfunc=lambda x: x,
                              dropna=False)
    steel_df = steel_df.reset_index()
    steel_df.columns = [str(i) for i in steel_df.columns.values]
    steel_df['class_count'] = steel_df[['1', '2', '3', '4']].count(axis=1)
    steel_df['split_label'] = steel_df[['1', '2', '3', '4', 'class_count'
                                        ]].apply(lambda x: make_split_label(x),
                                                 axis=1)
    train_idx, valid_idx, _, _ = train_test_split(steel_df.index,
                                                  steel_df['split_label'],
                                                  test_size=0.2,
                                                  random_state=43)

    train_dataset = SteelDataset(
        steel_df.iloc[train_idx],
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=train_transform,
        return_label=True,
        dataset='train',
    )
    if args.is_balance:
        train_sampler = BalanceClassSampler(
            train_dataset, args.sample_times * len(train_dataset))
    else:
        train_sampler = RandomSampler(train_dataset)

    train_loader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        pin_memory=True,
    )
    # valid_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_valid_cv%d.csv' % args.fold)
    valid_dataset = SteelDataset(
        steel_df.iloc[valid_idx],
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=True,
        dataset='val',
    )
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=max(int(args.batch_size // 2), 1),
                              drop_last=False,
                              num_workers=args.workers,
                              pin_memory=True)

    log.write('** start training here! **\n')
    log.write('\n')
    log.write(
        'epoch    iter      rate     | smooth_loss/dice | valid_loss/dice | best_epoch/best_score |  min \n'
    )
    log.write(
        '------------------------------------------------------------------------------------------------\n'
    )
    start_epoch += 1
    for epoch in range(start_epoch, args.epochs + 1):
        end = time.time()

        # set manual seeds per epoch
        np.random.seed(epoch)
        torch.manual_seed(epoch)
        torch.cuda.manual_seed_all(epoch)

        # adjust learning rate for each epoch
        lr_list = scheduler.step(model, epoch, args.epochs)
        lr = lr_list[0]

        # train for one epoch on train set
        iter, train_loss, train_dice = train(train_loader,
                                             model,
                                             ema_model,
                                             criterion,
                                             optimizer,
                                             epoch,
                                             args,
                                             lr=lr)

        with torch.no_grad():
            if args.ema:
                valid_loss, valid_dice = validate(valid_loader, ema_model,
                                                  criterion, epoch)
            else:
                valid_loss, valid_dice = validate(valid_loader, model,
                                                  criterion, epoch)

        # remember best loss and save checkpoint
        is_best = valid_dice >= best_dice
        if is_best:
            best_epoch = epoch
            best_dice = valid_dice

        if args.ema:
            save_top_epochs(model_out_dir,
                            ema_model,
                            best_dice_arr,
                            valid_dice,
                            best_epoch,
                            epoch,
                            best_dice,
                            ema=True)
        best_dice_arr = save_top_epochs(model_out_dir,
                                        model,
                                        best_dice_arr,
                                        valid_dice,
                                        best_epoch,
                                        epoch,
                                        best_dice,
                                        ema=False)

        print('\r', end='', flush=True)
        log.write('%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |  %0.4f  %6.4f |  %6.1f     %6.4f    | %3.1f min \n' % \
                  (epoch, iter + 1, lr, train_loss, train_dice, valid_loss, valid_dice,
                   best_epoch, best_dice, (time.time() - end) / 60))

        model_name = '%03d' % epoch
        if args.ema:
            save_model(ema_model,
                       model_out_dir,
                       epoch,
                       model_name,
                       best_dice_arr,
                       is_best=is_best,
                       optimizer=optimizer,
                       best_epoch=best_epoch,
                       best_dice=best_dice,
                       ema=True)
        save_model(model,
                   model_out_dir,
                   epoch,
                   model_name,
                   best_dice_arr,
                   is_best=is_best,
                   optimizer=optimizer,
                   best_epoch=best_epoch,
                   best_dice=best_dice,
                   ema=False)
示例#15
0
def train(args, model, optimizer, dataloader, dataloader_val, dataset_size,
          num_epochs, save_path):
    best_acc = float('inf')
    logging.basicConfig(level=logging.INFO,
                        format='%(message)s',
                        filename='log/regAASCE.log',
                        filemode='a')
    logging.info(f'regAASCE Using Densent\n')

    gpu_nums = len(args.gpu_devices.split(','))
    print('Using %d gpus' % gpu_nums)
    if gpu_nums > 1:
        model = DataParallel(model)
    model.cuda()

    for epoch in range(num_epochs):
        s1 = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            runing_loss = 0.0
            train_num = 0
            if phase == 'train':
                with tqdm(total=dataset_size,
                          desc=f'Epoch{epoch + 1}/{num_epochs}',
                          unit='img') as pbar:
                    for i_batch, sample_batched in enumerate(dataloader):
                        inputs = Variable(
                            sample_batched['image'].float()).cuda()
                        label = Variable(sample_batched['angle']).cuda()
                        img_name = sample_batched['name']

                        optimizer.zero_grad()
                        # forward
                        outputs = model(inputs)
                        train_num += len(inputs)
                        loss = torch.mean(torch.abs(label - outputs))
                        # backward
                        loss.backward()
                        optimizer.step()
                        pbar.set_postfix(**{'loss (batch)': loss.item()})
                        pbar.update(sample_batched['image'].shape[0])

                        runing_loss += loss * inputs.size(0)

                    epoch_loss = runing_loss / dataset_size
                    print('epoch: %d, loss %.5f' % (epoch, epoch_loss))
                    s2 = time.time()
                    print('Train complete in %.0f m %.0f s' % ((s2 - s1) // 60,
                                                               (s2 - s1) % 60))
                    logging.info(
                        f'{epoch + 1}/{num_epochs} loss: {epoch_loss}')

            else:
                error = val(model, dataloader_val, args)
                printline = 'Mean error: %.4f' % error
                print(printline)
                logging.info(printline)

                if error < best_acc:
                    best_acc = error
                    if gpu_nums > 1:
                        torch.save(
                            model.module.state_dict(),
                            os.path.join(
                                save_path,
                                '3DUnet_%d_%.4f.pth' % (epoch + 1, best_acc)))
                    else:
                        torch.save(
                            model.state_dict(),
                            os.path.join(
                                save_path,
                                '3DUnet_%d_%.4f.pth' % (epoch + 1, best_acc)))
示例#16
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold)
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.submit.txt'), mode='a')

    if args.ema:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           'fold%d' % args.fold,
                           '%s_ema.pth' % args.predict_epoch)
    else:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           'fold%d' % args.fold, '%s.pth' % args.predict_epoch)

    submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir,
                         'fold%d' % args.fold, 'epoch_%s' % args.predict_epoch)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        submit_out_dir))
    if not ope(submit_out_dir):
        os.makedirs(submit_out_dir)

    # setting up the visible GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    args.augment = args.augment.split(',')
    for augment in args.augment:
        if augment not in augment_list:
            raise ValueError(
                'Unsupported or unknown test augmentation: {}!'.format(
                    augment))

    model_params = {}
    model_params['architecture'] = args.arch
    model = init_network(model_params)

    log.write(">> Loading network:\n>>>> '{}'\n".format(network_path))
    checkpoint = torch.load(network_path)
    model.load_state_dict(checkpoint['state_dict'])
    log.write(">>>> loaded network:\n>>>> epoch {}\n".format(
        checkpoint['epoch']))

    # moving network to gpu and eval mode
    model = DataParallel(model)
    model.cuda()
    model.eval()

    # Data loading code
    dataset = args.dataset
    if dataset == 'train':
        test_split_file = opj(DATA_DIR, args.split_type, 'train.csv')
    elif dataset == 'test':
        test_split_file = opj(DATA_DIR, args.split_type, 'test.csv')
    elif dataset == 'val':
        test_split_file = opj(DATA_DIR, args.split_type, args.split_name,
                              'random_valid_cv%d.csv' % args.fold)
    elif dataset == 'nih':
        test_split_file = opj(DATA_DIR, args.split_type, 'nih_112120.csv')
    elif dataset == 'chexpert':
        test_split_file = opj(DATA_DIR, args.split_type, 'chexpert_188521.csv')
    else:
        raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
    test_dataset = SiimDataset(
        test_split_file,
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=False,
        crop_version=args.crop_version,
        dataset=args.dataset,
        predict_pos=args.predict_pos,
    )
    test_loader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    for augment in args.augment:
        test_loader.dataset.transform = eval('augment_%s' % augment)
        unaugment_func = eval('unaugment_%s' % augment)
        sub_submit_out_dir = opj(submit_out_dir, augment)
        if not ope(sub_submit_out_dir):
            os.makedirs(sub_submit_out_dir)
        with torch.no_grad():
            predict(test_loader,
                    model,
                    sub_submit_out_dir,
                    dataset,
                    args,
                    unaugment_func=unaugment_func)
示例#17
0
def main():
    args = parser.parse_args()

    log_out_dir = os.path.join(RESULT_DIR, 'logs', args.out_dir,
                               'fold%d' % args.fold)
    if not os.path.exists(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(os.path.join(log_out_dir, 'log.train.txt'), mode='a')

    model_out_dir = os.path.join(RESULT_DIR, 'models', args.out_dir,
                                 'fold%d' % args.fold)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        model_out_dir))
    if not os.path.exists(model_out_dir):
        os.makedirs(model_out_dir)

    # set cuda visible device
    if not args.all_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True
    # cudnn.enabled = False

    # set random seeds
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    np.random.seed(0)

    model_params = {}
    model_params['architecture'] = args.arch
    model_params['num_classes'] = 1
    model_params['in_channels'] = args.in_channels
    if 'efficientnet' in args.arch:
        model_params['image_size'] = args.img_size
        model_params['encoder'] = args.effnet_encoder

    model = init_network(model_params)

    if args.load_state_dict_path is not None:
        if args.load_state_dict_path == 'use-img-level-densenet-ckpt':
            model_dir = '../output/models/densenet121_1024_all_data__obvious_neg__gradaccum_20__start_lr_3e6'
            pretrained_ckpt_path = os.path.join(f'{model_dir}',
                                                f'fold{args.fold}',
                                                'final.pth')
        else:
            pretrained_ckpt_path = args.load_state_dict_path
        init_pretrained = torch.load(pretrained_ckpt_path)
        if args.load_as_is:
            model.load_state_dict(init_pretrained['state_dict'])
        else:
            model.load_state_dict({
                key: (val if key not in {'logit.weight', 'logit.bias'} else
                      torch.rand([1, 1024] if key == 'logit.weight' else [1]))
                for key, val in init_pretrained['state_dict'].items()
            })
            torch.nn.init.xavier_uniform(model.logit.weight)

    if args.all_gpus:
        model = DataParallel(model)
    model.cuda()

    # define loss function (criterion)
    try:
        criterion = eval(args.loss)().cuda()
    except:
        raise (RuntimeError("Loss {} not available!".format(args.loss)))

    start_epoch = 0
    best_loss = 1e5
    best_epoch = 0
    best_val_pr_auc_score = 0

    # define scheduler
    try:
        scheduler = eval(args.scheduler)(
            scheduler_lr_multiplier=args.scheduler_lr_multiplier,
            scheduler_epoch_offset=args.scheduler_epoch_offset)
    except:
        raise (RuntimeError("Scheduler {} not available!".format(
            args.scheduler)))
    optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0]

    # Data loading code
    train_transform = train_multi_augment2

    with open('../input/imagelevel_folds_obvious_staining_5.pkl', 'rb') as f:
        folds = pickle.load(f)
    fold = args.fold
    trn_img_paths, val_img_paths = folds[fold]

    train_df = get_train_df_ohe(clean_from_duplicates=True)
    basepath_2_ohe_vector = {
        img: vec
        for img, vec in zip(train_df['img_base_path'],
                            train_df.iloc[:, 2:].values)
    }

    public_hpa_df_17 = get_public_df_ohe(clean_from_duplicates=True)
    public_basepath_2_ohe_vector = {
        img_path: vec
        for img_path, vec in zip(public_hpa_df_17['img_base_path'],
                                 public_hpa_df_17.iloc[:, 2:].values)
    }
    basepath_2_ohe_vector.update(public_basepath_2_ohe_vector)

    available_paths = set(
        np.concatenate((train_df['img_base_path'].values,
                        public_hpa_df_17['img_base_path'].values)))
    trn_img_paths = [path for path in trn_img_paths if path in available_paths]
    val_img_paths = [path for path in val_img_paths if path in available_paths]
    labels_df = pd.read_hdf(args.cell_level_labels_path)

    # modifying minor class labels
    cherrypicked_mitotic_spindle = pd.read_csv(
        '../input/mitotic_cells_selection.csv')

    cherrypicked_mitotic_spindle_img_cell = set(
        cherrypicked_mitotic_spindle[['ID', 'cell_i']].apply(tuple,
                                                             axis=1).values)

    cherrypicked_mitotic_spindle_img_cell = {
        (img, cell_i - 1)
        for img, cell_i in cherrypicked_mitotic_spindle_img_cell
    }

    class_names = get_class_names()
    mitotic_spindle_class_i = class_names.index('Mitotic spindle')

    cherrypicked_mitotic_spindle_based_on_nn = pd.read_csv(
        '../input/mitotic_pos_nn_added.csv')
    cherrypicked_mitotic_spindle_img_cell.update(
        set(cherrypicked_mitotic_spindle_based_on_nn[['ID', 'cell_i'
                                                      ]].apply(tuple,
                                                               axis=1).values))
    mitotic_bool_idx = labels_df.index.isin(
        cherrypicked_mitotic_spindle_img_cell)

    negative_img_ids_cell = labels_df.index[np.logical_not(
        mitotic_bool_idx)].values

    dfs = []
    for fold in range(5):
        dfs.append(pd.read_csv(f'../output/mitotic_pred_fold_{fold}.csv'))
    pred_df = pd.concat(dfs)
    pred_df.set_index(['ID', 'cell_i'], inplace=True)
    positive_img_ids_cell = pred_df.index[pred_df['pred'] < 0.6].values

    if args.ignore_negative:
        raise NotImplementedError

    train_dataset = ProteinMitoticDatasetCellSeparateLoading(
        trn_img_paths,
        positive_img_ids_cell,
        negative_img_ids_cell,
        in_channels=args.in_channels,
        transform=train_transform,
        target_raw_img_size=args.target_raw_img_size)
    train_loader = DataLoader(
        train_dataset,
        sampler=MitoticBalancingSubSampler(train_dataset.img_ids_cell,
                                           train_dataset.id_cell_2_y),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    valid_dataset = ProteinMitoticDatasetCellSeparateLoading(
        val_img_paths,
        positive_img_ids_cell,
        sample(list(negative_img_ids_cell), 10000),
        img_size=args.img_size,
        in_channels=args.in_channels,
        target_raw_img_size=args.target_raw_img_size)
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=args.batch_size,
                              drop_last=False,
                              num_workers=args.workers,
                              pin_memory=True)

    log.write('** start training here! **\n')
    log.write('\n')
    log.write(
        'epoch    iter      rate     |  train_loss/acc  |    valid_loss/acc/pr_auc/---     |best_epoch/best_pr_auc|  min \n'
    )
    log.write(
        '-----------------------------------------------------------------------------------------------------------------\n'
    )
    start_epoch += 1

    if args.eval_at_start:
        with torch.no_grad():
            valid_loss, valid_acc, val_pr_auc_score = validate(
                valid_loader, model, criterion, -1, log)
        print('\r', end='', flush=True)
        log.write(
            '%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |    %0.4f  %6.4f %6.4f %6.1f  |    %6.4f  %6.4f   | %3.1f min \n' % \
            (-1, -1, -1, -1, -1, valid_loss, valid_acc, val_pr_auc_score, -1,
                   best_epoch, -1, -1))

    for epoch in range(start_epoch, args.epochs + 1):
        end = time.time()

        # set manual seeds per epoch
        np.random.seed(epoch)
        torch.manual_seed(epoch)
        torch.cuda.manual_seed_all(epoch)

        # adjust learning rate for each epoch
        lr_list = scheduler.step(model, epoch, args.epochs)
        lr = lr_list[0]

        # train for one epoch on train set
        iter, train_loss, train_acc = train(
            train_loader,
            model,
            criterion,
            optimizer,
            epoch,
            clipnorm=args.clipnorm,
            lr=lr,
            agg_steps=args.gradient_accumulation_steps)

        with torch.no_grad():
            valid_loss, valid_acc, val_pr_auc_score = validate(
                valid_loader, model, criterion, epoch, log)

        # remember best loss and save checkpoint
        is_best = val_pr_auc_score > best_val_pr_auc_score
        best_loss = min(valid_loss, best_loss)
        best_epoch = epoch if is_best else best_epoch
        best_val_pr_auc_score = val_pr_auc_score if is_best else best_val_pr_auc_score

        print('\r', end='', flush=True)
        log.write('%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |    %0.4f  %6.4f %6.4f  %6.1f |  %6.4f  %6.4f | %3.1f min \n' % \
                  (epoch, iter + 1, lr, train_loss, train_acc, valid_loss, valid_acc, val_pr_auc_score, -1,
                   best_epoch, best_val_pr_auc_score, (time.time() - end) / 60))

        save_model(model,
                   is_best,
                   model_out_dir,
                   optimizer=optimizer,
                   epoch=epoch,
                   best_epoch=best_epoch,
                   best_map=best_val_pr_auc_score)
示例#18
0
class Session:
    def __init__(self, dt_split):
        torch.manual_seed(66)
        torch.cuda.manual_seed_all(66)
        torch.cuda.set_device(settings.DEVICE)

        self.log_dir = settings.LOG_DIR
        self.model_dir = settings.MODEL_DIR
        ensure_dir(self.log_dir)
        ensure_dir(self.model_dir)
        logger.info('set log dir as %s' % self.log_dir)
        logger.info('set model dir as %s' % self.model_dir)

        self.step = 1
        self.writer = SummaryWriter(osp.join(self.log_dir, 'train.events'))
        dataset = TrainDataset(split=dt_split)
        self.dataloader = DataLoader(dataset,
                                     batch_size=settings.BATCH_SIZE,
                                     pin_memory=True,
                                     num_workers=settings.NUM_WORKERS,
                                     shuffle=True,
                                     drop_last=True)

        self.crit = nn.CrossEntropyLoss(ignore_index=settings.IGNORE_LABEL, \
            reduction='mean')

        self.net = EMANet(settings.N_CLASSES, settings.N_LAYERS)
        self.opt = SGD(params=[{
            'params': get_params(self.net, key='1x'),
            'lr': 1 * settings.LR,
            'weight_decay': settings.WEIGHT_DECAY,
        }, {
            'params': get_params(self.net, key='1y'),
            'lr': 1 * settings.LR,
            'weight_decay': 0,
        }, {
            'params': get_params(self.net, key='2x'),
            'lr': 2 * settings.LR,
            'weight_decay': 0.0,
        }],
                       momentum=settings.LR_MOM)

        self.net = DataParallel(self.net, device_ids=settings.DEVICES)
        patch_replication_callback(self.net)
        self.net = self.net.cuda()

    def write(self, out):
        for k, v in out.items():
            self.writer.add_scalar(k, v, self.step)

        out['lr'] = self.opt.param_groups[0]['lr']
        out['step'] = self.step
        outputs = ['{}: {:.4g}'.format(k, v) for k, v in out.items()]
        logger.info(' '.join(outputs))

    def save_checkpoints(self, name):
        ckp_path = osp.join(self.model_dir, name)
        obj = {
            'net': self.net.module.state_dict(),
            'step': self.step,
        }
        torch.save(obj, ckp_path)

    def load_checkpoints(self, name):
        ckp_path = osp.join(self.model_dir, name)
        try:
            obj = torch.load(ckp_path,
                             map_location=lambda storage, loc: storage.cuda())
            logger.info('Load checkpoint %s' % ckp_path)
        except FileNotFoundError:
            logger.error('No checkpoint %s!' % ckp_path)
            return

        self.net.module.load_state_dict(obj['net'])
        self.step = obj['step']

    def train_batch(self, image, label):
        loss, mu = self.net(image, label)

        with torch.no_grad():
            mu = mu.mean(dim=0, keepdim=True)
            momentum = settings.EM_MOM
            self.net.module.emau.mu *= momentum
            self.net.module.emau.mu += mu * (1 - momentum)

        loss = loss.mean()
        self.opt.zero_grad()
        #loss = self.crit(pred, label.long())
        loss.backward()
        self.opt.step()

        return loss.item()
示例#19
0
train_loader = VCRLoader.from_dataset(train, **loader_params)
val_loader = VCRLoader.from_dataset(val, **loader_params)
test_loader = VCRLoader.from_dataset(test, **loader_params)

ARGS_RESET_EVERY = 100
print("Loading {} for {}".format(params['model'].get('type', 'WTF?'),
                                 'rationales' if args.rationale else 'answer'),
      flush=True)
model = Model.from_params(vocab=train.vocab, params=params['model'])
for submodule in model.detector.backbone.modules():
    if isinstance(submodule, BatchNorm2d):
        submodule.track_running_stats = False
    for p in submodule.parameters():
        p.requires_grad = False

model = DataParallel(model).cuda() if NUM_GPUS > 1 else model.cuda()
optimizer = Optimizer.from_params(
    [x for x in model.named_parameters() if x[1].requires_grad],
    params['trainer']['optimizer'])

lr_scheduler_params = params['trainer'].pop("learning_rate_scheduler", None)
scheduler = LearningRateScheduler.from_params(
    optimizer, lr_scheduler_params) if lr_scheduler_params else None

if os.path.exists(args.folder):
    print("Found folder! restoring", flush=True)
    start_epoch, val_metric_per_epoch = restore_checkpoint(
        model,
        optimizer,
        serialization_dir=args.folder,
        learning_rate_scheduler=scheduler)
示例#20
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}')
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.submit.txt'), mode='a')

    if args.ema:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           f'fold{args.fold}', f'{args.predict_epoch}_ema.pth')
    else:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           f'fold{args.fold}', f'{args.predict_epoch}.pth')

    submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir,
                         f'fold{args.fold}', f'epoch_{args.predict_epoch}')
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        submit_out_dir))
    if not ope(submit_out_dir):
        os.makedirs(submit_out_dir)

    # setting up the visible GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    args.augment = args.augment.split(',')
    for augment in args.augment:
        if augment not in augment_list:
            raise ValueError(
                'Unsupported or unknown test augmentation: {}!'.format(
                    augment))

    model_params = {}
    model_params['architecture'] = args.arch
    model = init_network(model_params)

    log.write(">> Loading network:\n>>>> '{}'\n".format(network_path))
    checkpoint = torch.load(network_path)
    model.load_state_dict(checkpoint['state_dict'])
    log.write(">>>> loaded network:\n>>>> epoch {}\n".format(
        checkpoint['epoch']))

    # moving network to gpu and eval mode
    model = DataParallel(model)
    model.cuda()
    model.eval()

    # Data loading code
    dataset = args.dataset
    if dataset == 'test':
        steel_test_df = pd.read_csv(opj('..', 'input',
                                        'sample_submission.csv'))
    elif dataset == 'val':
        steel_test_df = pd.read_csv(
            opj(DATA_DIR, args.split_type, args.split_name,
                f'random_valid_cv{args.fold}.csv'))
    else:
        raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))

    steel_test_df['ImageId'], steel_test_df['ClassId'] = zip(
        *steel_test_df['ImageId_ClassId'].apply(lambda x: x.split('_')))
    imageId = pd.DataFrame(steel_test_df['ImageId'].unique(),
                           columns=['ImageId'])

    test_dataset = SteelDataset(
        imageId,
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=False,
        dataset=args.dataset,
    )
    test_loader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    for augment in args.augment:
        test_loader.dataset.transform = eval('augment_%s' % augment)
        unaugment_func = eval('unaugment_%s' % augment)
        sub_submit_out_dir = opj(submit_out_dir, augment)
        if not ope(sub_submit_out_dir):
            os.makedirs(sub_submit_out_dir)
        with torch.no_grad():
            predict(test_loader,
                    model,
                    sub_submit_out_dir,
                    dataset,
                    args,
                    unaugment_func=unaugment_func)
示例#21
0
class BaseTrainer(object):
    def __init__(self, args):
        self.args = args
        self.set_random_seed(random_seed=args.random_seed)

        self.tokenizer = BertTokenizer.from_pretrained(
            args.bert_model, do_lower_case=args.do_lower_case)
        if args.debug:
            print("Debugging mode on.")
        self.features_lst = self.get_features(self.args.train_folder,
                                              self.args.debug)

    def make_model_env(self, gpu, ngpus_per_node):
        if self.args.distributed:
            self.args.gpu = self.args.devices[gpu]
        else:
            self.args.gpu = 0

        if self.args.use_cuda and self.args.distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            self.args.rank = self.args.rank * ngpus_per_node + gpu
            dist.init_process_group(backend=self.args.dist_backend,
                                    init_method=self.args.dist_url,
                                    world_size=self.args.world_size,
                                    rank=self.args.rank)

        # Load baseline model
        self.model = BertForQuestionAnswering.from_pretrained(
            self.args.bert_model)

        if self.args.load_model is not None:
            print("Loading model from ", self.args.load_model)
            self.model.load_state_dict(
                torch.load(self.args.load_model,
                           map_location=lambda storage, loc: storage))

        max_len = max([len(f) for f in self.features_lst])
        num_train_optimization_steps = math.ceil(
            max_len / self.args.batch_size) * self.args.epochs * len(
                self.features_lst)

        if self.args.freeze_bert:
            for param in self.model.bert.parameters():
                param.requires_grad = False

        self.optimizer = get_opt(list(self.model.named_parameters()),
                                 num_train_optimization_steps, self.args)

        if self.args.use_cuda:
            if self.args.distributed:
                torch.cuda.set_device(self.args.gpu)
                self.model.cuda(self.args.gpu)
                self.args.batch_size = int(self.args.batch_size /
                                           ngpus_per_node)
                self.args.workers = int(
                    (self.args.workers + ngpus_per_node - 1) / ngpus_per_node)
                self.model = DistributedDataParallel(
                    self.model,
                    device_ids=[self.args.gpu],
                    find_unused_parameters=True)
            else:
                self.model.cuda()
                self.model = DataParallel(self.model,
                                          device_ids=self.args.devices)

        cudnn.benchmark = True

    def make_run_env(self):
        if self.args.distributed:
            # distributing dev file evaluation task
            self.dev_files = []
            gpu_num = len(self.args.devices)
            files = os.listdir(self.args.dev_folder)
            for i in range(len(files)):
                if i % gpu_num == self.args.rank:
                    self.dev_files.append(files[i])

            print("GPU {}".format(self.args.gpu), self.dev_files)
        else:
            self.dev_files = os.listdir(self.args.dev_folder)
            print(self.dev_files)

    def get_features(self, train_folder, debug=False):
        pickled_folder = self.args.pickled_folder + "_{}_{}".format(
            self.args.bert_model, str(self.args.skip_no_ans))

        features_lst = []

        files = [f for f in os.listdir(train_folder) if f.endswith(".gz")]
        print("Number of data set:{}".format(len(files)))
        for filename in files:
            data_name = filename.split(".")[0]
            # Check whether pkl file already exists
            pickle_file_name = '{}.pkl'.format(data_name)
            pickle_file_path = os.path.join(pickled_folder, pickle_file_name)
            if os.path.exists(pickle_file_path):
                with open(pickle_file_path, 'rb') as pkl_f:
                    print("Loading {} file as pkl...".format(data_name))
                    features_lst.append(pickle.load(pkl_f))
            else:
                print("processing {} file".format(data_name))
                file_path = os.path.join(train_folder, filename)

                train_examples = read_squad_examples(file_path, debug=debug)

                train_features = convert_examples_to_features(
                    examples=train_examples,
                    tokenizer=self.tokenizer,
                    max_seq_length=self.args.max_seq_length,
                    max_query_length=self.args.max_query_length,
                    doc_stride=self.args.doc_stride,
                    is_training=True,
                    skip_no_ans=self.args.skip_no_ans)

                features_lst.append(train_features)

                # Save feature lst as pickle (For reuse & fast loading)
                if not debug and self.args.rank == 0:
                    with open(pickle_file_path, 'wb') as pkl_f:
                        print("Saving {} file from pkl file...".format(
                            data_name))
                        pickle.dump(train_features, pkl_f)

        return features_lst

    def get_iter(self, features_lst, args):
        all_input_ids = []
        all_input_mask = []
        all_segment_ids = []
        all_start_positions = []
        all_end_positions = []
        all_labels = []

        for i, train_features in enumerate(features_lst):
            all_input_ids.append(
                torch.tensor([f.input_ids for f in train_features],
                             dtype=torch.long))
            all_input_mask.append(
                torch.tensor([f.input_mask for f in train_features],
                             dtype=torch.long))
            all_segment_ids.append(
                torch.tensor([f.segment_ids for f in train_features],
                             dtype=torch.long))

            start_positions = torch.tensor(
                [f.start_position for f in train_features], dtype=torch.long)
            end_positions = torch.tensor(
                [f.end_position for f in train_features], dtype=torch.long)

            all_start_positions.append(start_positions)
            all_end_positions.append(end_positions)
            all_labels.append(i * torch.ones_like(start_positions))

        all_input_ids = torch.cat(all_input_ids, dim=0)
        all_input_mask = torch.cat(all_input_mask, dim=0)
        all_segment_ids = torch.cat(all_segment_ids, dim=0)
        all_start_positions = torch.cat(all_start_positions, dim=0)
        all_end_positions = torch.cat(all_end_positions, dim=0)
        all_labels = torch.cat(all_labels, dim=0)

        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_start_positions,
                                   all_end_positions, all_labels)
        if args.distributed:
            train_sampler = DistributedSampler(train_data)
            data_loader = DataLoader(train_data,
                                     num_workers=args.workers,
                                     pin_memory=True,
                                     sampler=train_sampler,
                                     batch_size=args.batch_size)
        else:
            weights = make_weights_for_balanced_classes(
                all_labels.detach().cpu().numpy().tolist(),
                self.args.num_classes)
            weights = torch.DoubleTensor(weights)
            train_sampler = torch.utils.data.sampler.WeightedRandomSampler(
                weights, len(weights))
            data_loader = torch.utils.data.DataLoader(
                train_data,
                batch_size=args.batch_size,
                shuffle=None,
                sampler=train_sampler,
                num_workers=args.workers,
                worker_init_fn=self.set_random_seed(self.args.random_seed),
                pin_memory=True,
                drop_last=True)

        return data_loader, train_sampler

    def save_model(self, epoch, loss):
        loss = round(loss, 3)
        model_type = ("adv" if self.args.adv else "base")

        save_file = os.path.join(
            self.args.save_dir,
            "{}_{}_{:.3f}.pt".format(model_type, epoch, loss))
        save_file_config = os.path.join(
            self.args.save_dir,
            "{}_config_{}_{:.3f}.json".format(model_type, epoch, loss))

        model_to_save = self.model.module if hasattr(
            self.model,
            'module') else self.model  # Only save the model it-self

        torch.save(model_to_save.state_dict(), save_file)
        model_to_save.config.to_json_file(save_file_config)

    def train(self, consolidate=True, fisher_estimation_sample_size=1024):
        step = 1
        avg_loss = 0
        global_step = 1
        iter_lst = [self.get_iter(self.features_lst, self.args)]
        num_batches = sum([len(iterator[0]) for iterator in iter_lst])
        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            self.model.train()
            start = time.time()
            batch_step = 1
            for data_loader, sampler in iter_lst:
                if self.args.distributed:
                    sampler.set_epoch(epoch)

                for i, batch in enumerate(data_loader, start=1):
                    input_ids, input_mask, seg_ids, start_positions, end_positions, _ = batch

                    # remove unnecessary pad token
                    seq_len = torch.sum(torch.sign(input_ids), 1)
                    max_len = torch.max(seq_len)

                    input_ids = input_ids[:, :max_len].clone()
                    input_mask = input_mask[:, :max_len].clone()
                    seg_ids = seg_ids[:, :max_len].clone()
                    start_positions = start_positions.clone()
                    end_positions = end_positions.clone()

                    if self.args.use_cuda:
                        input_ids = input_ids.cuda(self.args.gpu,
                                                   non_blocking=True)
                        input_mask = input_mask.cuda(self.args.gpu,
                                                     non_blocking=True)
                        seg_ids = seg_ids.cuda(self.args.gpu,
                                               non_blocking=True)
                        start_positions = start_positions.cuda(
                            self.args.gpu, non_blocking=True)
                        end_positions = end_positions.cuda(self.args.gpu,
                                                           non_blocking=True)

                    loss = self.model(input_ids, seg_ids, input_mask,
                                      start_positions, end_positions)
                    loss = loss.mean()
                    loss = loss / self.args.gradient_accumulation_steps

                    ewc_loss = self.model.module.ewc_loss()
                    loss = loss + ewc_loss

                    loss.backward()

                    avg_loss = self.cal_running_avg_loss(
                        loss.item() * self.args.gradient_accumulation_steps,
                        avg_loss)
                    if step % self.args.gradient_accumulation_steps == 0:
                        self.optimizer.step()
                        self.optimizer.zero_grad()

                    if epoch != 0 and i % 2000 == 0:
                        result_dict = self.evaluate_model(i)
                        for dev_file, f1 in result_dict.items():
                            print("GPU/CPU {} evaluated {}: {:.2f}".format(
                                self.args.gpu, dev_file, f1),
                                  end="\n")

                    global_step += 1
                    batch_step += 1
                    msg = "{}/{} {} - ETA : {} - loss: {:.4f}" \
                        .format(batch_step, num_batches, progress_bar(batch_step, num_batches),
                                eta(start, batch_step, num_batches),
                                avg_loss)
                    print(msg, end="\r")

            print("[GPU Num: {}, epoch: {}, Final loss: {:.4f}]".format(
                self.args.gpu, epoch, avg_loss))

            # save model
            if self.args.rank == 0:
                self.save_model(epoch, avg_loss)

            if self.args.do_valid:
                result_dict = self.evaluate_model(epoch)
                for dev_file, f1 in result_dict.items():
                    print("GPU/CPU {} evaluated {}: {:.2f}".format(
                        self.args.gpu, dev_file, f1),
                          end="\n")

        if consolidate:
            # estimate the fisher information of the parameters and consolidate
            # them in the network.
            print(
                '=> Estimating diagonals of the fisher information matrix...',
                flush=True,
                end='',
            )
            # ATTENTION!!! the data_loader should entire training set!!!!
            self.model.consolidate(
                self.model.estimate_fisher(data_loader,
                                           fisher_estimation_sample_size))
            print('EWC Loaded!')

    def evaluate_model(self, epoch):
        # result directory
        result_file = os.path.join(self.args.result_dir,
                                   "dev_eval_{}.txt".format(epoch))
        fw = open(result_file, "a")
        result_dict = dict()
        for dev_file in self.dev_files:
            file_name = dev_file.split(".")[0]
            prediction_file = os.path.join(
                self.args.result_dir,
                "epoch_{}_{}.json".format(epoch, file_name))
            file_path = os.path.join(self.args.dev_folder, dev_file)
            metrics = eval_qa(self.model,
                              file_path,
                              prediction_file,
                              args=self.args,
                              tokenizer=self.tokenizer,
                              batch_size=self.args.batch_size)
            f1 = metrics["f1"]
            fw.write("{} : {}\n".format(file_name, f1))
            result_dict[dev_file] = f1
        fw.close()

        return result_dict

    @staticmethod
    def cal_running_avg_loss(loss, running_avg_loss, decay=0.99):
        if running_avg_loss == 0:
            return loss
        else:
            running_avg_loss = running_avg_loss * decay + (1 - decay) * loss
            return running_avg_loss

    @staticmethod
    def set_random_seed(random_seed):
        if random_seed is not None:
            print("Set random seed as {}".format(random_seed))
            os.environ['PYTHONHASHSEED'] = str(random_seed)
            random.seed(random_seed)
            np.random.seed(random_seed)
            torch.manual_seed(random_seed)
            torch.cuda.manual_seed_all(random_seed)
            torch.set_num_threads(1)
            cudnn.benchmark = False
            cudnn.deterministic = True
            warnings.warn('You have chosen to seed training. '
                          'This will turn on the CUDNN deterministic setting, '
                          'which can slow down your training considerably! '
                          'You may see unexpected behavior when restarting '
                          'from checkpoints.')
示例#22
0
def run():
    args = get_parser()
    with open(args.cfg_path) as f:
        cfg = json.load(f)

    logger = logging.getLogger("test")
    logger.setLevel(logging.DEBUG)
    fileHanlder = logging.FileHandler('test_auxiliary.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fileHanlder.setFormatter(formatter)
    logger.addHandler(fileHanlder)

    dataset = ScoringTestDataset(csv_file=args.test_root,
                                 root_dir=args.base_root)

    summary = {'count': 0, 'correct': 0, 'acc': 0}
    summary_writer = SummaryWriter(comment='TEST_AUXILIARY')

    model_cls = MODELS[cfg['model']](num_nodes=cfg['grid_size'],
                                     use_crf=cfg['use_crf'])
    model_cls = DataParallel(model_cls, device_ids=None)
    checkpoint = torch.load(args.load_path_cls)
    model_cls.load_state_dict(checkpoint['state_dict'])
    model_cls = model_cls.cuda()
    model_cls.eval()

    model_auxiliary = DiscriminatePatch()
    model_auxiliary = DataParallel(model_auxiliary, device_ids=None)
    checkpoint = torch.load(args.load_path_auxiliary)
    model_auxiliary.load_state_dict(checkpoint['state_dict'])
    model_auxiliary = model_auxiliary.cuda()
    model_auxiliary.eval()

    time_now = time.time()
    y_label = []
    y_pred = []

    for iteration, (image_list, image_auxiliary_list, score_list,
                    label) in enumerate(dataset):
        pred_label = []
        auxiliary_score = []
        batch_num = int(len(image_list) / args.batch_size)
        remain = int(len(image_list) % args.batch_size)
        image_cls_score = torch.stack(score_list)
        for index in range(batch_num):
            image_list_set = image_list[index * args.batch_size:(index + 1) *
                                        args.batch_size]
            image_set = torch.stack(image_list_set, 0)
            image_set = image_set.cuda()
            outputs = model_cls(image_set)
            probs = outputs.sigmoid()
            prediction = probs.ge(0.5)
            pred_label.append(prediction.cpu())
            image_auxiliary_list_set = image_auxiliary_list[index *
                                                            args.batch_size:
                                                            (index + 1) *
                                                            args.batch_size]
            image_auxiliary_set = torch.stack(image_auxiliary_list_set, 0)
            image_auxiliary_set = image_auxiliary_set.cuda()
            patch_score = model_auxiliary(image_auxiliary_set)
            patch_score = patch_score.ge(0.5)
            auxiliary_score.append((patch_score.cpu()))
        if remain != 0:
            image_list_set = image_list[batch_num * args.batch_size:]
            image_set = torch.stack(image_list_set, 0)
            image_set = image_set.cuda()
            outputs = model_cls(image_set)
            probs = outputs.sigmoid()
            prediction = probs.ge(0.5)
            image_auxiliary_list_set = image_auxiliary_list[batch_num *
                                                            args.batch_size:]
            image_auxiliary_set = torch.stack(image_auxiliary_list_set, 0)
            image_auxiliary_set = image_auxiliary_set.cuda()
            patch_score = model_auxiliary(image_auxiliary_set)
            patch_score = patch_score.ge(0.5)
            if remain == 1:
                prediction = prediction.unsqueeze(0)
                patch_score = patch_score.unsqueeze(0)
            pred_label.append(prediction.cpu())
            auxiliary_score.append((patch_score.cpu()))

        pred_cls_label = torch.cat(pred_label, dim=0).float()
        patch_auxiliary_score = torch.cat(auxiliary_score, dim=0).float()
        patch_discriminate_num = float(torch.sum(patch_auxiliary_score))
        patch_score = torch.mul(pred_cls_label, image_cls_score)
        score = torch.sum(patch_score, dim=1)
        score = score.unsqueeze(dim=1)
        finally_score = torch.sum(torch.mul(score, patch_auxiliary_score))
        ratio = float(finally_score) / float(patch_discriminate_num)

        if ratio >= 0.5:
            cls_label = torch.ones((1, ), dtype=torch.uint8)
        else:
            cls_label = torch.zeros((1, ), dtype=torch.uint8)
        if torch.equal(cls_label, label):
            summary['correct'] += 1

        logger.info(cls_label)
        logger.info(label)
        logger.info('score: {:.4f} / patch_num: {} = {:.4f}'.format(
            float(finally_score), patch_discriminate_num, ratio))

        summary['count'] += 1
        summary['acc'] = float(summary['correct']) / float(summary['count'])
        summary_writer.add_scalar('test/acc', summary['acc'], summary['count'])
        logger.info(
            '{}, Numbers of all WSI: {}, Number of the correct WSI classification: {}, '
            'Accuracy: {:.4f}'.format(time.strftime("%Y-%m-%d %H:%M:%S"),
                                      summary['count'], summary['correct'],
                                      summary['acc']))
        y_label.append(label.cpu())
        y_pred.append(float(ratio))

    y_label_array = numpy.array(y_label)
    y_pred_array = numpy.array(y_pred)

    fpr, tpr, threshold = metrics.roc_curve(y_true=y_label_array,
                                            y_score=y_pred_array,
                                            pos_label=1)
    auc = metrics.auc(fpr, tpr)
    logger.info('AUC = {:.4f}'.format(auc))
    plt.figure()
    plt.plot(fpr,
             tpr,
             color='darkorange',
             label='ROC Curve(area = %0.4f' % auc)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.show()
示例#23
0
class UNetTrainer(object):
    """UNet trainer"""
    def __init__(self,
                 start_epoch=0,
                 save_dir='',
                 resume="",
                 devices_num=2,
                 num_classes=2,
                 color_dim=1):

        self.net = UNet(color_dim=color_dim, num_classes=num_classes)
        self.start_epoch = start_epoch if start_epoch != 0 else 1
        self.save_dir = os.path.join('../models', save_dir)
        self.loss = CrossEntropyLoss()
        self.num_classes = num_classes

        if resume:
            checkpoint = torch.load(resume)
            if self.start_epoch == 0:
                self.start_epoch = checkpoint['epoch'] + 1
            if not self.save_dir:
                self.save_dir = checkpoint['save_dir']
            self.net.load_state_dict(checkpoint['state_dir'])

        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)

        self.net.cuda()
        self.loss.cuda()
        if devices_num == 2:
            self.net = DataParallel(self.net, device_ids=[0, 1])
        #self.loss = DataParallel(self.loss, device_ids=[0, 1])

    def train(self,
              train_loader,
              val_loader,
              lr=0.001,
              weight_decay=1e-4,
              epochs=200,
              save_freq=10):

        self.logfile = os.path.join(self.save_dir, 'log')
        sys.stdout = Logger(self.logfile)
        self.epochs = epochs
        self.lr = lr

        optimizer = torch.optim.Adam(
            self.net.parameters(),
            #lr,
            #momentum=0.9,
            weight_decay=weight_decay)

        for epoch in range(self.start_epoch, epochs + 1):
            self.train_(train_loader, epoch, optimizer, save_freq)
            self.validate_(val_loader, epoch)

    def train_(self, data_loader, epoch, optimizer, save_freq=10):
        start_time = time.time()

        self.net.train()
        #lr = self.get_lr(epoch)

        #for param_group in optimizer.param_groups:
        #    param_group['lr'] = lr

        metrics = []

        for i, (data, target) in enumerate(tqdm(data_loader)):
            data_t, target_t = data, target
            data = Variable(data.cuda(non_blocking=True))
            target = Variable(target.cuda(non_blocking=True))

            output = self.net(data)  #unet输出结果

            output = output.transpose(1, 3).transpose(1, 2).contiguous().view(
                -1, self.num_classes)
            target = target.view(-1)
            loss_output = self.loss(output, target)

            optimizer.zero_grad()
            loss_output.backward()  #反向传播loss
            optimizer.step()

            loss_output = loss_output.data[0]  #loss数值
            acc = accuracy(output, target)
            metrics.append([loss_output, acc])

            if i == 0:
                batch_size = data.size(0)
                _, output = output.data.max(dim=1)
                output = output.view(batch_size, 1, 1, 320, 480).cpu()  #预测结果图
                data_t = data_t[0, 0].unsqueeze(0).unsqueeze(0)  #原img图
                target_t = target_t[0].unsqueeze(0)  #gt图
                t = torch.cat([output[0].float(), data_t,
                               target_t.float()], 0)  #第一个参数为list,拼接3张图像
                #show_list = []
                #for j in range(10):
                #    show_list.append(data_t[j, 0].unsqueeze(0).unsqueeze(0))
                #    show_list.append(target_t[j].unsqueeze(0))
                #    show_list.append(output[j].float())
                #
                #t = torch.cat(show_list, 0)
                torchvision.utils.save_image(t,
                                             "temp_image/%02d_train.jpg" %
                                             epoch,
                                             nrow=3)

            #if i == 20:
            #    break

        if epoch % save_freq == 0:
            if 'module' in dir(self.net):
                state_dict = self.net.module.state_dict()
            else:
                state_dict = self.net.state_dict()

            for key in state_dict.keys():
                state_dict[key] = state_dict[key].cpu()

            torch.save(
                {
                    'epoch': epoch,
                    'save_dir': self.save_dir,
                    'state_dir': state_dict
                }, os.path.join(self.save_dir, '%03d.ckpt' % epoch))

        end_time = time.time()

        metrics = np.asarray(metrics, np.float32)
        self.print_metrics(metrics, 'Train', end_time - start_time, epoch)

    def validate_(self, data_loader, epoch):
        start_time = time.time()

        self.net.eval()
        metrics = []
        for i, (data, target) in enumerate(data_loader):
            data_t, target_t = data, target
            data = Variable(data.cuda(non_blocking=True), volatile=True)
            target = Variable(target.cuda(non_blocking=True), volatile=True)

            output = self.net(data)
            output = output.transpose(1, 3).transpose(1, 2).contiguous().view(
                -1, self.num_classes)
            target = target.view(-1)
            loss_output = self.loss(output, target)

            loss_output = loss_output.data[0]
            acc = accuracy(output, target)
            metrics.append([loss_output, acc])

            if i == 0:
                batch_size = data.size(0)
                _, output = output.data.max(dim=1)
                output = output.view(batch_size, 1, 1, 320, 480).cpu()
                data_t = data_t[0, 0].unsqueeze(0).unsqueeze(0)
                target_t = target_t[0].unsqueeze(0)
                t = torch.cat([output[0].float(), data_t, target_t.float()], 0)
                #    show_list = []
                #    for j in range(10):
                #        show_list.append(data_t[j, 0].unsqueeze(0).unsqueeze(0))
                #        show_list.append(target_t[j].unsqueeze(0))
                #        show_list.append(output[j].float())
                #
                #    t = torch.cat(show_list, 0)
                torchvision.utils.save_image(t,
                                             "temp_image/%02d_val.jpg" % epoch,
                                             nrow=3)
            #if i == 10:
            #    break

        end_time = time.time()

        metrics = np.asarray(metrics, np.float32)
        self.print_metrics(metrics, 'Validation', end_time - start_time)

    def print_metrics(self, metrics, phase, time, epoch=-1):
        """metrics: [loss, acc]
        """
        if epoch != -1:
            print("Epoch: {}".format(epoch), )
        print(phase, )
        print('loss %2.4f, accuracy %2.4f, time %2.2f' %
              (np.mean(metrics[:, 0]), np.mean(metrics[:, 1]), time))
        if phase != 'Train':
            print

    def get_lr(self, epoch):
        if epoch <= self.epochs * 0.5:
            lr = self.lr
        elif epoch <= self.epochs * 0.8:
            lr = 0.1 * self.lr
        else:
            lr = 0.01 * self.lr
        return lr

    def save_py_files(self, path):
        """copy .py files in exps dir, cfgs dir and current dir into
           save_dir, and keep the files structure
        """
        #exps dir
        pyfiles = [f for f in os.listdir(path) if f.endswith('.py')]
        path = "/".join(path.split('/')[-2:])
        exp_save_path = os.path.join(self.save_dir, path)
        mkdir(exp_save_path)
        for f in pyfiles:
            shutil.copy(os.path.join(path, f), os.path.join(exp_save_path, f))
        #current dir
        pyfiles = [f for f in os.listdir('./') if f.endswith('.py')]
        for f in pyfiles:
            shutil.copy(f, os.path.join(self.save_dir, f))
        #cfgs dir
        shutil.copytree('./cfgs', os.path.join(self.save_dir, 'cfgs'))
示例#24
0
def main():
    weight_path = config.weights + config.model_name + os.sep + config.description + os.sep + str(
        config.fold) + os.sep
    if not os.path.exists(weight_path):
        os.makedirs(weight_path)
    log_path = config.logs + config.model_name + os.sep + config.description + os.sep + str(
        config.fold) + os.sep
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    submit_path = config.submit + config.model_name + os.sep + config.description + os.sep + str(
        config.fold) + os.sep
    if not os.path.exists(submit_path):
        os.makedirs(submit_path)

    config.write_to_log(log_path + os.sep + 'log.txt')

    #dataset preparing
    train_dataset = customDataset(config.train_data, train=True)
    val_dataset = customDataset(config.test_data, train=True)
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              pin_memory=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=config.batch_size * 2,
                            shuffle=False,
                            pin_memory=False)
    #model preparing
    model = get_net(config.num_classes)
    model = DataParallel(model.cuda(), device_ids=config.gpus)
    model.train()
    #optimizer preparing
    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           amsgrad=True,
                           weight_decay=config.weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    #loss preparing
    #criterion = nn.CrossEntropyLoss().cuda()
    criterion = FocalLoss(config.num_classes).cuda()

    train_loss = AverageMeter()
    train_top1 = AverageMeter()
    valid_loss = [np.inf, 0, 0]
    best_precision = 0

    for epoch in range(config.epochs):
        scheduler.step(epoch)
        train_progressor = ProgressBar(log_path,
                                       mode="Train",
                                       epoch=epoch,
                                       total_epoch=config.epochs,
                                       model_name=config.model_name,
                                       total=len(train_loader))
        for index, (data, label) in enumerate(train_loader):
            train_progressor.current = index
            data = Variable(data).cuda()
            label = Variable(torch.from_numpy(np.asarray(label))).cuda()

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()

            precision1_train, precision2_train = accuracy(output,
                                                          label,
                                                          topk=(1, 2))
            train_loss.update(loss.item(), data.size(0))
            train_top1.update(precision1_train[0], data.size(0))
            train_progressor.current_loss = train_loss.avg
            train_progressor.current_top1 = train_top1.avg
            train_progressor()
            #print('train epoch %d iteration %d: loss: %.3f' % (epoch + 1, index + 1, loss.data))
        train_progressor.done()
        val_loss, val_top1 = evaluate(epoch, model, val_loader, criterion,
                                      log_path)
        is_best = val_top1 > best_precision
        #print(bool(is_best))
        best_precision = max(val_top1, best_precision)
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_precision1": best_precision,
                "optimizer": optimizer.state_dict(),
                "fold": config.fold,
                "valid_loss": valid_loss,
            }, is_best, weight_path, log_path, epoch)
示例#25
0
def run():
    args = get_parser()
    with open(args.cfg_path) as f:
        cfg = json.load(f)

    logger = logging.getLogger("test")
    logger.setLevel(logging.DEBUG)
    fileHanlder = logging.FileHandler('test_lr_auc.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fileHanlder.setFormatter(formatter)
    logger.addHandler(fileHanlder)

    dataset = ImageTestDataset(csv_file=args.test_root,
                               root_dir=args.base_root)

    model_cnn = MODELS[cfg['model']](num_nodes=cfg['grid_size'],
                                     use_crf=cfg['use_crf'])
    model_cnn = DataParallel(model_cnn, device_ids=None)
    checkpoint_cnn = torch.load(args.load_path_cnn)
    model_cnn.load_state_dict(checkpoint_cnn['state_dict'])
    model_cnn = model_cnn.cuda()
    model_cnn.eval()
    model_lr = LogistRegression()
    model_lr = DataParallel(model_lr, device_ids=None)
    checkpoint_lr = torch.load(args.load_path_lr)
    model_lr.load_state_dict(checkpoint_lr['state_dict'])
    model_lr.cuda()
    model_lr.eval()

    summary = {'count': 0, 'correct': 0, 'acc': 0}
    y_pred = []
    y_label = []

    for iteration, (image_list, label) in enumerate(dataset):
        time_now = time.time()
        pred_label = []
        label = label.cuda()
        batch_num = int(len(image_list) / args.batch_size)
        remain = int(len(image_list) % args.batch_size)
        for index in range(batch_num):
            image_list_set = image_list[index * args.batch_size:(index + 1) *
                                        args.batch_size]
            image_set = torch.stack(image_list_set, 0)
            image_set = image_set.cuda()
            outputs = model_cnn(image_set)
            probs = outputs.sigmoid()
            prediction = probs.ge(0.5)
            pred_label.append(prediction.cpu())
        if remain != 0:
            image_list_set = image_list[batch_num * args.batch_size:]
            image_set = torch.stack(image_list_set, 0)
            image_set = image_set.cuda()
            outputs = model_cnn(image_set)
            probs = outputs.sigmoid()
            prediction = probs.ge(0.5)
            if remain == 1:
                prediction = prediction.unsqueeze(0)
            pred_label.append(prediction.cpu())

        pred_cls_label = torch.cat(pred_label, dim=0)
        grid_num_sum = pred_cls_label.size()[0] * pred_cls_label.size()[1]
        score = torch.sum(pred_cls_label)

        number_A = int(score)
        number_B = grid_num_sum - number_A

        A = torch.full((1, ), number_A)
        B = torch.full((1, ), number_B)
        histogram = torch.cat((A, B), dim=0)

        output = model_lr(histogram)
        pred_cls = output.ge(0.5)
        summary['count'] += 1
        if torch.equal(pred_cls, label):
            summary['correct'] += 1
        time_spent = time.time() - time_now
        summary['acc'] = float(summary['correct']) / float(summary['count'])
        logger.info(
            '{}, Numbers of all WSI: {}, Number of the correct WSI classification: {}, '
            'Accuracy: {:.4f}, Running time: {:.4f}'.format(
                time.strftime("%Y-%m-%d %H:%M:%S"), summary['count'],
                summary['correct'], summary['acc'], time_spent))

        y_pred.append(float(output.cpu()))
        y_label.append(label.cpu())

    y_pred_array = numpy.array(y_pred)
    y_label_array = numpy.array(y_label)

    fpr, tpr, threshold = metrics.roc_curve(y_true=y_label_array,
                                            y_score=y_pred_array,
                                            pos_label=1)
    auc = metrics.auc(fpr, tpr)
    logger.info('AUC = {:.4f}'.format(auc))
    plt.figure()
    plt.plot(fpr,
             tpr,
             color='darkorange',
             label='ROC Curve(area = %0.4f' % auc)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.show()
示例#26
0
    def trainer(self,
                train_path,
                val_path,
                cur_fold,
                output_dir=None,
                log_dir=None,
                optimizer='Adam',
                loss_fun='Cross_Entropy',
                class_weight=None,
                lr_scheduler=None):

        torch.manual_seed(1000)
        np.random.seed(1000)
        torch.cuda.manual_seed_all(1000)
        print('Device:{}'.format(self.device))
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True


        output_dir = os.path.join(output_dir, "fold" + str(cur_fold))
        log_dir = os.path.join(log_dir, "fold" + str(cur_fold))

        if os.path.exists(log_dir):
            if not self.pre_trained:
                shutil.rmtree(log_dir)
                os.makedirs(log_dir)
        else:
            os.makedirs(log_dir)

        if os.path.exists(output_dir):
            if not self.pre_trained:
                shutil.rmtree(output_dir)
                os.makedirs(output_dir)
        else:
            os.makedirs(output_dir)
        self.step_pre_epoch = len(train_path) // self.batch_size
        self.writer = SummaryWriter(log_dir)
        self.global_step = self.start_epoch * math.ceil(
            len(train_path[0]) / self.batch_size)

        net = self.net

        # only for deeplab
        if self.freeze is not None and 'deeplab' in self.net_name:
            if self.freeze == 'backbone':
                net.freeze_backbone()
            elif self.freeze == 'classifier':
                net.freeze_classifier()

        lr = self.lr
        loss = self._get_loss(loss_fun, class_weight)

        if len(self.device.split(',')) > 1:
            net = DataParallel(net)

        # dataloader setting
        if self.mode == 'cls':
            train_transformer = transforms.Compose([
                Trunc_and_Normalize(self.scale),
                CropResizeHalf(dim=self.input_shape,num_class=self.num_classes,crop=self.crop),
                RandomEraseHalf(scale_flag=False),
                RandomDistortHalf(),
                RandomTranslationRotationZoomHalf(num_class=self.num_classes),
                RandomFlipHalf(mode='hv'),
                RandomAdjustHalf(),
                To_Tensor(num_class=self.num_classes)
            ])
        else:
            train_transformer = transforms.Compose([
                Trunc_and_Normalize(self.scale),
                CropResizeHalf(dim=self.input_shape,num_class=self.num_classes,crop=self.crop),
                RandomEraseHalf(scale_flag=False),
                RandomDistortHalf(),
                RandomTranslationRotationZoomHalf(num_class=self.num_classes),
                # RandomFlipHalf(mode='hv'),
                # RandomAdjustHalf(),
                RandomNoiseHalf(),
                To_Tensor(num_class=self.num_classes)
            ])
        train_dataset = DataGenerator(train_path,
                                      roi_number=self.roi_number,
                                      num_class=self.num_classes,
                                      transform=train_transformer,
                                      seq_len=self.seq_len)

        train_loader = DataLoader(train_dataset,
                                  batch_size=self.batch_size,
                                  shuffle=True,
                                  num_workers=self.num_workers,
                                  pin_memory=True)

        # copy to gpu
        net = net.cuda()
        loss = loss.cuda()

        # optimizer setting
        optimizer = self._get_optimizer(optimizer, net, lr)
        if self.pre_trained and self.ckpt_point:
            checkpoint = torch.load(self.weight_path)
            optimizer.load_state_dict(checkpoint['optimizer'])

        if lr_scheduler is not None:
            lr_scheduler = self._get_lr_scheduler(lr_scheduler, optimizer)

        # loss_threshold = 1.0
        early_stopping = EarlyStopping(patience=20,verbose=True,monitor='val_loss',op_type='min')

        for epoch in range(self.start_epoch, self.n_epoch):
            train_loss, train_dice, train_acc = self._train_on_epoch(epoch, net, loss, optimizer, train_loader)

            val_loss, val_dice, val_acc = self._val_on_epoch(epoch, net, loss, val_path)

            if lr_scheduler is not None:
                lr_scheduler.step(val_loss)

            torch.cuda.empty_cache()
            print('epoch:{},train_loss:{:.5f},val_loss:{:.5f}'.format(epoch, train_loss, val_loss))

            print('epoch:{},train_dice:{:.5f},val_dice:{:.5f}'.format(epoch, train_dice, val_dice))

            self.writer.add_scalars('data/loss', {
                'train': train_loss,
                'val': val_loss
            }, epoch)
            self.writer.add_scalars('data/dice', {
                'train': train_dice,
                'val': val_dice
            }, epoch)
            self.writer.add_scalars('data/acc', {
                'train': train_acc,
                'val': val_acc
            }, epoch)
            self.writer.add_scalar('data/lr', optimizer.param_groups[0]['lr'],epoch)

            early_stopping(val_loss)
            #save
            if val_loss <= self.loss_threshold:
                self.loss_threshold = val_loss

                if len(self.device.split(',')) > 1:
                    state_dict = net.module.state_dict()
                else:
                    state_dict = net.state_dict()

                saver = {
                    'epoch': epoch,
                    'save_dir': output_dir,
                    'state_dict': state_dict,
                    'optimizer': optimizer.state_dict()
                }

                file_name = 'epoch:{}-train_loss:{:.5f}-train_dice:{:.5f}-train_acc:{:.5f}-val_loss:{:.5f}-val_dice:{:.5f}-val_acc:{:.5f}.pth'.format(
                    epoch, train_loss, train_dice, train_acc, val_loss,
                    val_dice, val_acc)
                
                save_path = os.path.join(output_dir, file_name)
                print("Save as %s" % file_name)

                torch.save(saver, save_path)
            
            if early_stopping.early_stop:
                print('Early Stopping!')
                break

        self.writer.close()
示例#27
0
def train(args):
    print('start training...')
    model, model_file = create_model(args)
    #model = model.cuda()
    if torch.cuda.device_count() > 1:
        model_name = model.name
        model = DataParallel(model)
        model.name = model_name
    model = model.cuda()

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer,
                                         mode='max',
                                         factor=args.factor,
                                         patience=args.patience,
                                         min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer,
                                         args.t_max,
                                         eta_min=args.min_lr)
    #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7)

    if args.balanced:
        _, val_loader = get_balanced_train_val_loaders(
            num_classes=args.num_classes,
            start_index=args.start_index,
            batch_size=args.batch_size,
            val_batch_size=args.val_batch_size,
            val_num=args.val_num,
            other=args.other)
    else:
        _, val_loader = get_train_val_loaders(
            num_classes=args.num_classes,
            start_index=args.start_index,
            batch_size=args.batch_size,
            val_batch_size=args.val_batch_size,
            val_num=args.val_num,
            other=args.other)

    best_top1_acc = 0.

    print(
        'epoch |    lr    |      %        |  loss  |  avg   |  loss  |  top1  | top10  |  best  | time |  save |'
    )

    if not args.no_first_val:
        top10_acc, best_top1_acc, total_loss = validate(
            args, model, val_loader)
        print(
            'val   |          |               |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} |      |       |'
            .format(total_loss, best_top1_acc, top10_acc, best_top1_acc))

    if args.val:
        return

    model.train()

    if args.lrs == 'plateau':
        lr_scheduler.step(best_top1_acc)
    else:
        lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.start_epoch, args.epochs):
        if args.balanced:
            train_loader, val_loader = get_balanced_train_val_loaders(
                num_classes=args.num_classes,
                start_index=args.start_index,
                batch_size=args.batch_size,
                dev_mode=args.dev_mode,
                val_batch_size=args.val_batch_size,
                val_num=args.val_num,
                other=args.other)
        else:
            train_loader, val_loader = get_train_val_loaders(
                num_classes=args.num_classes,
                start_index=args.start_index,
                batch_size=args.batch_size,
                dev_mode=args.dev_mode,
                val_batch_size=args.val_batch_size,
                val_num=args.val_num,
                other=args.other)

        train_loss = 0

        current_lr = get_lrs(
            optimizer)  #optimizer.state_dict()['param_groups'][2]['lr']
        bg = time.time()
        for batch_idx, data in enumerate(train_loader):
            train_iter += 1
            img, target = data
            img, target = img.cuda(), target.cuda()
            loss = model(img, target).sum() / img.size(0)
            #loss = criterion(args, output, target)
            #(img.size(0) * loss).backward()
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1),
                train_loader.num, loss.item(), train_loss / (batch_idx + 1)),
                  end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                if isinstance(model, DataParallel):
                    torch.save(model.module.state_dict(),
                               model_file + '_latest')
                else:
                    torch.save(model.state_dict(), model_file + '_latest')

                top10_acc, top1_acc, total_loss = validate(
                    args, model, val_loader)

                _save_ckp = ''
                if args.always_save or top1_acc > best_top1_acc:
                    best_top1_acc = top1_acc
                    if isinstance(model, DataParallel):
                        torch.save(model.module.state_dict(), model_file)
                    else:
                        torch.save(model.state_dict(), model_file)
                    _save_ckp = '*'
                print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'.
                      format(total_loss, top1_acc, top10_acc, best_top1_acc,
                             (time.time() - bg) / 60, _save_ckp))

                model.train()

                if args.lrs == 'plateau':
                    lr_scheduler.step(top1_acc)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
示例#28
0

# 1. torch.Size([8, 1, 38, 252])
# 2. torch.Size([8, 50, 34, 228])
# 3. torch.Size([8, 50, 34, 76])
# 4. torch.Size([8, 50, 32, 72])
# 5. torch.Size([8, 50, 32, 24])
# 6. torch.Size([8, 38400])
# 7. torch.Size([8, 19200])
# 8. torch.Size([8, 9600])

# In[7]:

net, loss = get_model()
net = DataParallel(net)
net = net.cuda()
loss = loss.cuda()

# In[29]:

data_dir = 'someprocesseddata'
dataset = data_loader(data_dir,
                      win_width,
                      kernel_size,
                      overlap=True,
                      phase='train')
train_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=n_workers,
示例#29
0
def run(config):
    from datasets import myDataset
    config, model, loss, warp, trainer, train_data, val_data, train_loader, val_loader = prepare(
        config)
    # print(model)

    # data, gt_prob_fpn, gt_coord_prob_fpn, gt_coord_diff_fpn, gt_diff_fpn, gt_connects_fpn, self.cases[idx] = train_data[0]
    # print(data.shape)
    # exit()

    if config.test:
        print('Start testing')
        #if hasattr(model, 'test'):
        #    model.forward = model.test
        model = DataParallel(model.cuda())

        tester = Tester(model, config)
        val_data = myDataset(config, 'test')
        test_loader = DataLoader(val_data,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=3,
                                 pin_memory=True,
                                 collate_fn=lambda x: x)
        tester.test(test_loader)
        return
    elif config.val:
        print('Start Val')
        start_epoch = config.train['start_epoch']
        trainer.validate(start_epoch, val_loader, save=True)
    else:
        start_epoch = config.train['start_epoch']
        epoch = config.train["epoch"]
        print('Start training from %d-th epoch' % start_epoch)

        epoch2loss = {}
        for i in range(start_epoch, epoch + 1):
            try:
                # no hardming
                if 'hardmining' in config.prepare and config.prepare[
                        'hardmining']:
                    train_loader.dataset.resample3()
                    json.dump(
                        [str(item) for item in train_loader.dataset.samples],
                        open(
                            os.path.join(trainer.save_dir,
                                         'sample_%d.json' % (i)), 'w'),
                        indent=2)
                    json.dump(
                        {
                            k: str(v)
                            for k, v in
                            train_loader.dataset.sample_weights.items()
                        },
                        open(
                            os.path.join(trainer.save_dir,
                                         'sample_weights_%d.json' % (i)), 'w'),
                        indent=2)
                    #json.dump({k: str(v) for k, v in train_loader.dataset.neg_sample_weights.items()}, open(os.path.join(trainer.save_dir, 'neg_sample_weights_%d.json'%(i)), 'w'), indent=2)
                loss_list = trainer.train(i, train_loader)
                epoch2loss[i] = list(loss_list)
                trainer.validate(i, val_loader)
            except KeyboardInterrupt as e:
                traceback.print_exc()
                trainer.ioer.save_file(trainer.net,
                                       i,
                                       trainer.args,
                                       1e10,
                                       isbreak=True)
                sys.exit(0)

        print(epoch2loss)
        with open('./epoch_loss.json', 'w') as f:
            f.write(json.dumps(epoch2loss))
示例#30
0
def run(args):
    with open(args.cnn_path, 'r') as f:
        cnn = json.load(f)

    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)

    with open(os.path.join(args.save_path, 'cnn.json'), 'w') as f:
        json.dump(cnn, f, indent=1)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids
    num_GPU = len(args.device_ids.split(','))
    batch_size_train = cnn['batch_size'] * num_GPU
    batch_size_valid = cnn['batch_size'] * num_GPU
    num_workers = args.num_workers * num_GPU

    model = chose_model(cnn)
    fc_features = model.fc.in_features
    model.fc = nn.Linear(fc_features, 1) # 须知
    model = DataParallel(model, device_ids=None)
    model = model.cuda()
    loss_fn = BCEWithLogitsLoss().cuda()
    optimizer = SGD(model.parameters(), lr=cnn['lr'], momentum=cnn['momentum'])

    # dataset_train = ImageFolder(cnn['data_path_train'])
    # dataset_valid = ImageFolder(cnn['data_path_valid'])
    dataset_train = ImageDataset(cnn['data_path_train'],
                                 cnn['image_size'],
                                 cnn['crop_size'],
                                 cnn['normalize'])
    dataset_valid = ImageDataset(cnn['data_path_valid'],
                                 cnn['image_size'],
                                 cnn['crop_size'],
                                 cnn['normalize'])

    dataloader_train = DataLoader(dataset_train,
                                  batch_size=batch_size_train,
                                  num_workers=num_workers)
    dataloader_valid = DataLoader(dataset_valid,
                                  batch_size=batch_size_valid,
                                  num_workers=num_workers)

    summary_train = {'epoch': 0, 'step': 0}
    summary_valid = {'loss': float('inf'), 'acc': 0}
    summary_writer = SummaryWriter(args.save_path)
    loss_valid_best = float('inf')
    for epoch in range(cnn['epoch']):
        summary_train = train_epoch(summary_train, summary_writer, cnn, model,
                                    loss_fn, optimizer,
                                    dataloader_train)

        torch.save({'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'state_dict': model.module.state_dict()},
                   os.path.join(args.save_path, 'train.ckpt'))

        time_now = time.time()
        summary_valid = valid_epoch(summary_valid, model, loss_fn,
                                    dataloader_valid)
        time_spent = time.time() - time_now

        logging.info('{}, Epoch: {}, step: {}, Validation Loss: {:.5f}, '
                     'Validation ACC: {:.3f}, Run Time: {:.2f}'
                     .format(time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['epoch'],
                             summary_train['step'], summary_valid['loss'],
                             summary_valid['acc'], time_spent))

        summary_writer.add_scalar('valid/loss',
                                  summary_valid['loss'], summary_train['step'])
        summary_writer.add_scalar('valid/acc',
                                  summary_valid['acc'], summary_train['step'])

        if summary_valid['loss'] < loss_valid_best:
            loss_valid_best = summary_valid['loss']

        torch.save({'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'state_dict': model.module.state_dict()},
                   os.path.join(args.save_path, 'best.ckpt'))

    summary_writer.close()